git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "libavutil/pixdesc.h"
  25 #include "avcodec.h"
  26 #include "fdctdsp.h"
  27 #include "put_bits.h"
  28 #include "bytestream.h"
  29 #include "internal.h"
  30 #include "proresdata.h"
  31
  32 #define CFACTOR_Y422 2
  33 #define CFACTOR_Y444 3
  34
  35 #define MAX_MBS_PER_SLICE 8
  36
  37 #define MAX_PLANES 4
  38
  39 enum {
  40     PRORES_PROFILE_PROXY = 0,
  41     PRORES_PROFILE_LT,
  42     PRORES_PROFILE_STANDARD,
  43     PRORES_PROFILE_HQ,
  44     PRORES_PROFILE_4444,
  45 };
  46
  47 enum {
  48     QUANT_MAT_PROXY = 0,
  49     QUANT_MAT_LT,
  50     QUANT_MAT_STANDARD,
  51     QUANT_MAT_HQ,
  52     QUANT_MAT_DEFAULT,
  53 };
  54
  55 static const uint8_t prores_quant_matrices[][64] = {
  56     { // proxy
  57          4,  7,  9, 11, 13, 14, 15, 63,
  58          7,  7, 11, 12, 14, 15, 63, 63,
  59          9, 11, 13, 14, 15, 63, 63, 63,
  60         11, 11, 13, 14, 63, 63, 63, 63,
  61         11, 13, 14, 63, 63, 63, 63, 63,
  62         13, 14, 63, 63, 63, 63, 63, 63,
  63         13, 63, 63, 63, 63, 63, 63, 63,
  64         63, 63, 63, 63, 63, 63, 63, 63,
  65     },
  66     { // LT
  67          4,  5,  6,  7,  9, 11, 13, 15,
  68          5,  5,  7,  8, 11, 13, 15, 17,
  69          6,  7,  9, 11, 13, 15, 15, 17,
  70          7,  7,  9, 11, 13, 15, 17, 19,
  71          7,  9, 11, 13, 14, 16, 19, 23,
  72          9, 11, 13, 14, 16, 19, 23, 29,
  73          9, 11, 13, 15, 17, 21, 28, 35,
  74         11, 13, 16, 17, 21, 28, 35, 41,
  75     },
  76     { // standard
  77          4,  4,  5,  5,  6,  7,  7,  9,
  78          4,  4,  5,  6,  7,  7,  9,  9,
  79          5,  5,  6,  7,  7,  9,  9, 10,
  80          5,  5,  6,  7,  7,  9,  9, 10,
  81          5,  6,  7,  7,  8,  9, 10, 12,
  82          6,  7,  7,  8,  9, 10, 12, 15,
  83          6,  7,  7,  9, 10, 11, 14, 17,
  84          7,  7,  9, 10, 11, 14, 17, 21,
  85     },
  86     { // high quality
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  4,
  89          4,  4,  4,  4,  4,  4,  4,  4,
  90          4,  4,  4,  4,  4,  4,  4,  5,
  91          4,  4,  4,  4,  4,  4,  5,  5,
  92          4,  4,  4,  4,  4,  5,  5,  6,
  93          4,  4,  4,  4,  5,  5,  6,  7,
  94          4,  4,  4,  4,  5,  6,  7,  7,
  95     },
  96     { // codec default
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105     },
 106 };
 107
 108 #define NUM_MB_LIMITS 4
 109 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 110     1620, // up to 720x576
 111     2700, // up to 960x720
 112     6075, // up to 1440x1080
 113     9216, // up to 2048x1152
 114 };
 115
 116 static const struct prores_profile {
 117     const char *full_name;
 118     uint32_t    tag;
 119     int         min_quant;
 120     int         max_quant;
 121     int         br_tab[NUM_MB_LIMITS];
 122     int         quant;
 123 } prores_profile_info[5] = {
 124     {
 125         .full_name = "proxy",
 126         .tag       = MKTAG('a', 'p', 'c', 'o'),
 127         .min_quant = 4,
 128         .max_quant = 8,
 129         .br_tab    = { 300, 242, 220, 194 },
 130         .quant     = QUANT_MAT_PROXY,
 131     },
 132     {
 133         .full_name = "LT",
 134         .tag       = MKTAG('a', 'p', 'c', 's'),
 135         .min_quant = 1,
 136         .max_quant = 9,
 137         .br_tab    = { 720, 560, 490, 440 },
 138         .quant     = QUANT_MAT_LT,
 139     },
 140     {
 141         .full_name = "standard",
 142         .tag       = MKTAG('a', 'p', 'c', 'n'),
 143         .min_quant = 1,
 144         .max_quant = 6,
 145         .br_tab    = { 1050, 808, 710, 632 },
 146         .quant     = QUANT_MAT_STANDARD,
 147     },
 148     {
 149         .full_name = "high quality",
 150         .tag       = MKTAG('a', 'p', 'c', 'h'),
 151         .min_quant = 1,
 152         .max_quant = 6,
 153         .br_tab    = { 1566, 1216, 1070, 950 },
 154         .quant     = QUANT_MAT_HQ,
 155     },
 156     {
 157         .full_name = "4444",
 158         .tag       = MKTAG('a', 'p', '4', 'h'),
 159         .min_quant = 1,
 160         .max_quant = 6,
 161         .br_tab    = { 2350, 1828, 1600, 1425 },
 162         .quant     = QUANT_MAT_HQ,
 163     }
 164 };
 165
 166 #define TRELLIS_WIDTH 16
 167 #define SCORE_LIMIT   INT_MAX / 2
 168
 169 struct TrellisNode {
 170     int prev_node;
 171     int quant;
 172     int bits;
 173     int score;
 174 };
 175
 176 #define MAX_STORED_Q 16
 177
 178 typedef struct ProresThreadData {
 179     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 180     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 181     int16_t custom_q[64];
 182     struct TrellisNode *nodes;
 183 } ProresThreadData;
 184
 185 typedef struct ProresContext {
 186     AVClass *class;
 187     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 188     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 189     int16_t quants[MAX_STORED_Q][64];
 190     int16_t custom_q[64];
 191     const uint8_t *quant_mat;
 192     const uint8_t *scantable;
 193
 194     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 195                  int linesize, int16_t *block);
 196     FDCTDSPContext fdsp;
 197
 198     int mb_width, mb_height;
 199     int mbs_per_slice;
 200     int num_chroma_blocks, chroma_factor;
 201     int slices_width;
 202     int slices_per_picture;
 203     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 204     int cur_picture_idx;
 205     int num_planes;
 206     int bits_per_mb;
 207     int force_quant;
 208     int alpha_bits;
 209     int warn;
 210
 211     char *vendor;
 212     int quant_sel;
 213
 214     int frame_size_upper_bound;
 215
 216     int profile;
 217     const struct prores_profile *profile_info;
 218
 219     int *slice_q;
 220
 221     ProresThreadData *tdata;
 222 } ProresContext;
 223
 224 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 225                            int linesize, int x, int y, int w, int h,
 226                            int16_t *blocks, uint16_t *emu_buf,
 227                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 228 {
 229     const uint16_t *esrc;
 230     const int mb_width = 4 * blocks_per_mb;
 231     int elinesize;
 232     int i, j, k;
 233
 234     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 235         if (x >= w) {
 236             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 237                               * sizeof(*blocks));
 238             return;
 239         }
 240         if (x + mb_width <= w && y + 16 <= h) {
 241             esrc      = src;
 242             elinesize = linesize;
 243         } else {
 244             int bw, bh, pix;
 245
 246             esrc      = emu_buf;
 247             elinesize = 16 * sizeof(*emu_buf);
 248
 249             bw = FFMIN(w - x, mb_width);
 250             bh = FFMIN(h - y, 16);
 251
 252             for (j = 0; j < bh; j++) {
 253                 memcpy(emu_buf + j * 16,
 254                        (const uint8_t*)src + j * linesize,
 255                        bw * sizeof(*src));
 256                 pix = emu_buf[j * 16 + bw - 1];
 257                 for (k = bw; k < mb_width; k++)
 258                     emu_buf[j * 16 + k] = pix;
 259             }
 260             for (; j < 16; j++)
 261                 memcpy(emu_buf + j * 16,
 262                        emu_buf + (bh - 1) * 16,
 263                        mb_width * sizeof(*emu_buf));
 264         }
 265         if (!is_chroma) {
 266             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 267             blocks += 64;
 268             if (blocks_per_mb > 2) {
 269                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 270                 blocks += 64;
 271             }
 272             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 273             blocks += 64;
 274             if (blocks_per_mb > 2) {
 275                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 276                 blocks += 64;
 277             }
 278         } else {
 279             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 280             blocks += 64;
 281             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 282             blocks += 64;
 283             if (blocks_per_mb > 2) {
 284                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 285                 blocks += 64;
 286                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 287                 blocks += 64;
 288             }
 289         }
 290
 291         x += mb_width;
 292     }
 293 }
 294
 295 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 296                            int linesize, int x, int y, int w, int h,
 297                            int16_t *blocks, int mbs_per_slice, int abits)
 298 {
 299     const int slice_width = 16 * mbs_per_slice;
 300     int i, j, copy_w, copy_h;
 301
 302     copy_w = FFMIN(w - x, slice_width);
 303     copy_h = FFMIN(h - y, 16);
 304     for (i = 0; i < copy_h; i++) {
 305         memcpy(blocks, src, copy_w * sizeof(*src));
 306         if (abits == 8)
 307             for (j = 0; j < copy_w; j++)
 308                 blocks[j] >>= 2;
 309         else
 310             for (j = 0; j < copy_w; j++)
 311                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 312         for (j = copy_w; j < slice_width; j++)
 313             blocks[j] = blocks[copy_w - 1];
 314         blocks += slice_width;
 315         src    += linesize >> 1;
 316     }
 317     for (; i < 16; i++) {
 318         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 319         blocks += slice_width;
 320     }
 321 }
 322
 323 /**
 324  * Write an unsigned rice/exp golomb codeword.
 325  */
 326 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 327 {
 328     unsigned int rice_order, exp_order, switch_bits, switch_val;
 329     int exponent;
 330
 331     /* number of prefix bits to switch between Rice and expGolomb */
 332     switch_bits = (codebook & 3) + 1;
 333     rice_order  =  codebook >> 5;       /* rice code order */
 334     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 335
 336     switch_val  = switch_bits << rice_order;
 337
 338     if (val >= switch_val) {
 339         val -= switch_val - (1 << exp_order);
 340         exponent = av_log2(val);
 341
 342         put_bits(pb, exponent - exp_order + switch_bits, 0);
 343         put_bits(pb, exponent + 1, val);
 344     } else {
 345         exponent = val >> rice_order;
 346
 347         if (exponent)
 348             put_bits(pb, exponent, 0);
 349         put_bits(pb, 1, 1);
 350         if (rice_order)
 351             put_sbits(pb, rice_order, val);
 352     }
 353 }
 354
 355 #define GET_SIGN(x)  ((x) >> 31)
 356 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 357
 358 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 359                        int blocks_per_slice, int scale)
 360 {
 361     int i;
 362     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 363
 364     prev_dc = (blocks[0] - 0x4000) / scale;
 365     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 366     sign     = 0;
 367     codebook = 3;
 368     blocks  += 64;
 369
 370     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 371         dc       = (blocks[0] - 0x4000) / scale;
 372         delta    = dc - prev_dc;
 373         new_sign = GET_SIGN(delta);
 374         delta    = (delta ^ sign) - sign;
 375         code     = MAKE_CODE(delta);
 376         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 377         codebook = (code + (code & 1)) >> 1;
 378         codebook = FFMIN(codebook, 3);
 379         sign     = new_sign;
 380         prev_dc  = dc;
 381     }
 382 }
 383
 384 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 385                        int blocks_per_slice,
 386                        int plane_size_factor,
 387                        const uint8_t *scan, const int16_t *qmat)
 388 {
 389     int idx, i;
 390     int run, level, run_cb, lev_cb;
 391     int max_coeffs, abs_level;
 392
 393     max_coeffs = blocks_per_slice << 6;
 394     run_cb     = ff_prores_run_to_cb_index[4];
 395     lev_cb     = ff_prores_lev_to_cb_index[2];
 396     run        = 0;
 397
 398     for (i = 1; i < 64; i++) {
 399         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 400             level = blocks[idx] / qmat[scan[i]];
 401             if (level) {
 402                 abs_level = FFABS(level);
 403                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 404                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 405                                     abs_level - 1);
 406                 put_sbits(pb, 1, GET_SIGN(level));
 407
 408                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 409                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 410                 run    = 0;
 411             } else {
 412                 run++;
 413             }
 414         }
 415     }
 416 }
 417
 418 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 419                               const uint16_t *src, int linesize,
 420                               int mbs_per_slice, int16_t *blocks,
 421                               int blocks_per_mb, int plane_size_factor,
 422                               const int16_t *qmat)
 423 {
 424     int blocks_per_slice, saved_pos;
 425
 426     saved_pos = put_bits_count(pb);
 427     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 428
 429     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 430     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 431                ctx->scantable, qmat);
 432     flush_put_bits(pb);
 433
 434     return (put_bits_count(pb) - saved_pos) >> 3;
 435 }
 436
 437 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 438 {
 439     const int mask  = (1 << abits) - 1;
 440     const int dbits = (abits == 8) ? 4 : 7;
 441     const int dsize = 1 << dbits - 1;
 442     int diff = cur - prev;
 443
 444     diff &= mask;
 445     if (diff >= (1 << abits) - dsize)
 446         diff -= 1 << abits;
 447     if (diff < -dsize || diff > dsize || !diff) {
 448         put_bits(pb, 1, 1);
 449         put_bits(pb, abits, diff);
 450     } else {
 451         put_bits(pb, 1, 0);
 452         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 453         put_bits(pb, 1, diff < 0);
 454     }
 455 }
 456
 457 static void put_alpha_run(PutBitContext *pb, int run)
 458 {
 459     if (run) {
 460         put_bits(pb, 1, 0);
 461         if (run < 0x10)
 462             put_bits(pb, 4, run);
 463         else
 464             put_bits(pb, 15, run);
 465     } else {
 466         put_bits(pb, 1, 1);
 467     }
 468 }
 469
 470 // todo alpha quantisation for high quants
 471 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 472                               int mbs_per_slice, uint16_t *blocks,
 473                               int quant)
 474 {
 475     const int abits = ctx->alpha_bits;
 476     const int mask  = (1 << abits) - 1;
 477     const int num_coeffs = mbs_per_slice * 256;
 478     int saved_pos = put_bits_count(pb);
 479     int prev = mask, cur;
 480     int idx = 0;
 481     int run = 0;
 482
 483     cur = blocks[idx++];
 484     put_alpha_diff(pb, cur, prev, abits);
 485     prev = cur;
 486     do {
 487         cur = blocks[idx++];
 488         if (cur != prev) {
 489             put_alpha_run (pb, run);
 490             put_alpha_diff(pb, cur, prev, abits);
 491             prev = cur;
 492             run  = 0;
 493         } else {
 494             run++;
 495         }
 496     } while (idx < num_coeffs);
 497     if (run)
 498         put_alpha_run(pb, run);
 499     flush_put_bits(pb);
 500     return (put_bits_count(pb) - saved_pos) >> 3;
 501 }
 502
 503 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 504                         PutBitContext *pb,
 505                         int sizes[4], int x, int y, int quant,
 506                         int mbs_per_slice)
 507 {
 508     ProresContext *ctx = avctx->priv_data;
 509     int i, xp, yp;
 510     int total_size = 0;
 511     const uint16_t *src;
 512     int slice_width_factor = av_log2(mbs_per_slice);
 513     int num_cblocks, pwidth, linesize, line_add;
 514     int plane_factor, is_chroma;
 515     uint16_t *qmat;
 516
 517     if (ctx->pictures_per_frame == 1)
 518         line_add = 0;
 519     else
 520         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 521
 522     if (ctx->force_quant) {
 523         qmat = ctx->quants[0];
 524     } else if (quant < MAX_STORED_Q) {
 525         qmat = ctx->quants[quant];
 526     } else {
 527         qmat = ctx->custom_q;
 528         for (i = 0; i < 64; i++)
 529             qmat[i] = ctx->quant_mat[i] * quant;
 530     }
 531
 532     for (i = 0; i < ctx->num_planes; i++) {
 533         is_chroma    = (i == 1 || i == 2);
 534         plane_factor = slice_width_factor + 2;
 535         if (is_chroma)
 536             plane_factor += ctx->chroma_factor - 3;
 537         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 538             xp          = x << 4;
 539             yp          = y << 4;
 540             num_cblocks = 4;
 541             pwidth      = avctx->width;
 542         } else {
 543             xp          = x << 3;
 544             yp          = y << 4;
 545             num_cblocks = 2;
 546             pwidth      = avctx->width >> 1;
 547         }
 548
 549         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 550         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 551                                 line_add * pic->linesize[i]) + xp;
 552
 553         if (i < 3) {
 554             get_slice_data(ctx, src, linesize, xp, yp,
 555                            pwidth, avctx->height / ctx->pictures_per_frame,
 556                            ctx->blocks[0], ctx->emu_buf,
 557                            mbs_per_slice, num_cblocks, is_chroma);
 558             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 559                                           mbs_per_slice, ctx->blocks[0],
 560                                           num_cblocks, plane_factor,
 561                                           qmat);
 562         } else {
 563             get_alpha_data(ctx, src, linesize, xp, yp,
 564                            pwidth, avctx->height / ctx->pictures_per_frame,
 565                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 566             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
 567                                           ctx->blocks[0], quant);
 568         }
 569         total_size += sizes[i];
 570         if (put_bits_left(pb) < 0) {
 571             av_log(avctx, AV_LOG_ERROR,
 572                    "Underestimated required buffer size.\n");
 573             return AVERROR_BUG;
 574         }
 575     }
 576     return total_size;
 577 }
 578
 579 static inline int estimate_vlc(unsigned codebook, int val)
 580 {
 581     unsigned int rice_order, exp_order, switch_bits, switch_val;
 582     int exponent;
 583
 584     /* number of prefix bits to switch between Rice and expGolomb */
 585     switch_bits = (codebook & 3) + 1;
 586     rice_order  =  codebook >> 5;       /* rice code order */
 587     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 588
 589     switch_val  = switch_bits << rice_order;
 590
 591     if (val >= switch_val) {
 592         val -= switch_val - (1 << exp_order);
 593         exponent = av_log2(val);
 594
 595         return exponent * 2 - exp_order + switch_bits + 1;
 596     } else {
 597         return (val >> rice_order) + rice_order + 1;
 598     }
 599 }
 600
 601 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 602                         int scale)
 603 {
 604     int i;
 605     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 606     int bits;
 607
 608     prev_dc  = (blocks[0] - 0x4000) / scale;
 609     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 610     sign     = 0;
 611     codebook = 3;
 612     blocks  += 64;
 613     *error  += FFABS(blocks[0] - 0x4000) % scale;
 614
 615     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 616         dc       = (blocks[0] - 0x4000) / scale;
 617         *error  += FFABS(blocks[0] - 0x4000) % scale;
 618         delta    = dc - prev_dc;
 619         new_sign = GET_SIGN(delta);
 620         delta    = (delta ^ sign) - sign;
 621         code     = MAKE_CODE(delta);
 622         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 623         codebook = (code + (code & 1)) >> 1;
 624         codebook = FFMIN(codebook, 3);
 625         sign     = new_sign;
 626         prev_dc  = dc;
 627     }
 628
 629     return bits;
 630 }
 631
 632 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 633                         int plane_size_factor,
 634                         const uint8_t *scan, const int16_t *qmat)
 635 {
 636     int idx, i;
 637     int run, level, run_cb, lev_cb;
 638     int max_coeffs, abs_level;
 639     int bits = 0;
 640
 641     max_coeffs = blocks_per_slice << 6;
 642     run_cb     = ff_prores_run_to_cb_index[4];
 643     lev_cb     = ff_prores_lev_to_cb_index[2];
 644     run        = 0;
 645
 646     for (i = 1; i < 64; i++) {
 647         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 648             level   = blocks[idx] / qmat[scan[i]];
 649             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 650             if (level) {
 651                 abs_level = FFABS(level);
 652                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 653                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 654                                      abs_level - 1) + 1;
 655
 656                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 657                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 658                 run    = 0;
 659             } else {
 660                 run++;
 661             }
 662         }
 663     }
 664
 665     return bits;
 666 }
 667
 668 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 669                                 const uint16_t *src, int linesize,
 670                                 int mbs_per_slice,
 671                                 int blocks_per_mb, int plane_size_factor,
 672                                 const int16_t *qmat, ProresThreadData *td)
 673 {
 674     int blocks_per_slice;
 675     int bits;
 676
 677     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 678
 679     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 680     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 681                          plane_size_factor, ctx->scantable, qmat);
 682
 683     return FFALIGN(bits, 8);
 684 }
 685
 686 static int est_alpha_diff(int cur, int prev, int abits)
 687 {
 688     const int mask  = (1 << abits) - 1;
 689     const int dbits = (abits == 8) ? 4 : 7;
 690     const int dsize = 1 << dbits - 1;
 691     int diff = cur - prev;
 692
 693     diff &= mask;
 694     if (diff >= (1 << abits) - dsize)
 695         diff -= 1 << abits;
 696     if (diff < -dsize || diff > dsize || !diff)
 697         return abits + 1;
 698     else
 699         return dbits + 1;
 700 }
 701
 702 static int estimate_alpha_plane(ProresContext *ctx, int *error,
 703                                 const uint16_t *src, int linesize,
 704                                 int mbs_per_slice, int quant,
 705                                 int16_t *blocks)
 706 {
 707     const int abits = ctx->alpha_bits;
 708     const int mask  = (1 << abits) - 1;
 709     const int num_coeffs = mbs_per_slice * 256;
 710     int prev = mask, cur;
 711     int idx = 0;
 712     int run = 0;
 713     int bits;
 714
 715     *error = 0;
 716     cur = blocks[idx++];
 717     bits = est_alpha_diff(cur, prev, abits);
 718     prev = cur;
 719     do {
 720         cur = blocks[idx++];
 721         if (cur != prev) {
 722             if (!run)
 723                 bits++;
 724             else if (run < 0x10)
 725                 bits += 4;
 726             else
 727                 bits += 15;
 728             bits += est_alpha_diff(cur, prev, abits);
 729             prev = cur;
 730             run  = 0;
 731         } else {
 732             run++;
 733         }
 734     } while (idx < num_coeffs);
 735
 736     if (run) {
 737         if (run < 0x10)
 738             bits += 4;
 739         else
 740             bits += 15;
 741     }
 742
 743     return bits;
 744 }
 745
 746 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 747                             int trellis_node, int x, int y, int mbs_per_slice,
 748                             ProresThreadData *td)
 749 {
 750     ProresContext *ctx = avctx->priv_data;
 751     int i, q, pq, xp, yp;
 752     const uint16_t *src;
 753     int slice_width_factor = av_log2(mbs_per_slice);
 754     int num_cblocks[MAX_PLANES], pwidth;
 755     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 756     const int min_quant = ctx->profile_info->min_quant;
 757     const int max_quant = ctx->profile_info->max_quant;
 758     int error, bits, bits_limit;
 759     int mbs, prev, cur, new_score;
 760     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 761     int overquant;
 762     uint16_t *qmat;
 763     int linesize[4], line_add;
 764
 765     if (ctx->pictures_per_frame == 1)
 766         line_add = 0;
 767     else
 768         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 769     mbs = x + mbs_per_slice;
 770
 771     for (i = 0; i < ctx->num_planes; i++) {
 772         is_chroma[i]    = (i == 1 || i == 2);
 773         plane_factor[i] = slice_width_factor + 2;
 774         if (is_chroma[i])
 775             plane_factor[i] += ctx->chroma_factor - 3;
 776         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 777             xp             = x << 4;
 778             yp             = y << 4;
 779             num_cblocks[i] = 4;
 780             pwidth         = avctx->width;
 781         } else {
 782             xp             = x << 3;
 783             yp             = y << 4;
 784             num_cblocks[i] = 2;
 785             pwidth         = avctx->width >> 1;
 786         }
 787
 788         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
 789         src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
 790                                 line_add * pic->linesize[i]) + xp;
 791
 792         if (i < 3) {
 793             get_slice_data(ctx, src, linesize[i], xp, yp,
 794                            pwidth, avctx->height / ctx->pictures_per_frame,
 795                            td->blocks[i], td->emu_buf,
 796                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 797         } else {
 798             get_alpha_data(ctx, src, linesize[i], xp, yp,
 799                            pwidth, avctx->height / ctx->pictures_per_frame,
 800                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 801         }
 802     }
 803
 804     for (q = min_quant; q < max_quant + 2; q++) {
 805         td->nodes[trellis_node + q].prev_node = -1;
 806         td->nodes[trellis_node + q].quant     = q;
 807     }
 808
 809     // todo: maybe perform coarser quantising to fit into frame size when needed
 810     for (q = min_quant; q <= max_quant; q++) {
 811         bits  = 0;
 812         error = 0;
 813         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 814             bits += estimate_slice_plane(ctx, &error, i,
 815                                          src, linesize[i],
 816                                          mbs_per_slice,
 817                                          num_cblocks[i], plane_factor[i],
 818                                          ctx->quants[q], td);
 819         }
 820         if (ctx->alpha_bits)
 821             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 822                                          mbs_per_slice, q, td->blocks[3]);
 823         if (bits > 65000 * 8) {
 824             error = SCORE_LIMIT;
 825             break;
 826         }
 827         slice_bits[q]  = bits;
 828         slice_score[q] = error;
 829     }
 830     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 831         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 832         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 833         overquant = max_quant;
 834     } else {
 835         for (q = max_quant + 1; q < 128; q++) {
 836             bits  = 0;
 837             error = 0;
 838             if (q < MAX_STORED_Q) {
 839                 qmat = ctx->quants[q];
 840             } else {
 841                 qmat = td->custom_q;
 842                 for (i = 0; i < 64; i++)
 843                     qmat[i] = ctx->quant_mat[i] * q;
 844             }
 845             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 846                 bits += estimate_slice_plane(ctx, &error, i,
 847                                              src, linesize[i],
 848                                              mbs_per_slice,
 849                                              num_cblocks[i], plane_factor[i],
 850                                              qmat, td);
 851             }
 852             if (ctx->alpha_bits)
 853                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 854                                              mbs_per_slice, q, td->blocks[3]);
 855             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 856                 break;
 857         }
 858
 859         slice_bits[max_quant + 1]  = bits;
 860         slice_score[max_quant + 1] = error;
 861         overquant = q;
 862     }
 863     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 864
 865     bits_limit = mbs * ctx->bits_per_mb;
 866     for (pq = min_quant; pq < max_quant + 2; pq++) {
 867         prev = trellis_node - TRELLIS_WIDTH + pq;
 868
 869         for (q = min_quant; q < max_quant + 2; q++) {
 870             cur = trellis_node + q;
 871
 872             bits  = td->nodes[prev].bits + slice_bits[q];
 873             error = slice_score[q];
 874             if (bits > bits_limit)
 875                 error = SCORE_LIMIT;
 876
 877             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 878                 new_score = td->nodes[prev].score + error;
 879             else
 880                 new_score = SCORE_LIMIT;
 881             if (td->nodes[cur].prev_node == -1 ||
 882                 td->nodes[cur].score >= new_score) {
 883
 884                 td->nodes[cur].bits      = bits;
 885                 td->nodes[cur].score     = new_score;
 886                 td->nodes[cur].prev_node = prev;
 887             }
 888         }
 889     }
 890
 891     error = td->nodes[trellis_node + min_quant].score;
 892     pq    = trellis_node + min_quant;
 893     for (q = min_quant + 1; q < max_quant + 2; q++) {
 894         if (td->nodes[trellis_node + q].score <= error) {
 895             error = td->nodes[trellis_node + q].score;
 896             pq    = trellis_node + q;
 897         }
 898     }
 899
 900     return pq;
 901 }
 902
 903 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 904                              int jobnr, int threadnr)
 905 {
 906     ProresContext *ctx = avctx->priv_data;
 907     ProresThreadData *td = ctx->tdata + threadnr;
 908     int mbs_per_slice = ctx->mbs_per_slice;
 909     int x, y = jobnr, mb, q = 0;
 910
 911     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 912         while (ctx->mb_width - x < mbs_per_slice)
 913             mbs_per_slice >>= 1;
 914         q = find_slice_quant(avctx, avctx->coded_frame,
 915                              (mb + 1) * TRELLIS_WIDTH, x, y,
 916                              mbs_per_slice, td);
 917     }
 918
 919     for (x = ctx->slices_width - 1; x >= 0; x--) {
 920         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 921         q = td->nodes[q].prev_node;
 922     }
 923
 924     return 0;
 925 }
 926
 927 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 928                         const AVFrame *pic, int *got_packet)
 929 {
 930     ProresContext *ctx = avctx->priv_data;
 931     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 932     uint8_t *picture_size_pos;
 933     PutBitContext pb;
 934     int x, y, i, mb, q = 0;
 935     int sizes[4] = { 0 };
 936     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 937     int frame_size, picture_size, slice_size;
 938     int pkt_size, ret, max_slice_size = 0;
 939     uint8_t frame_flags;
 940
 941     *avctx->coded_frame           = *pic;
 942     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 943     avctx->coded_frame->key_frame = 1;
 944
 945     pkt_size = ctx->frame_size_upper_bound;
 946
 947     if ((ret = ff_alloc_packet(pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0) {
 948         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 949         return ret;
 950     }
 951
 952     orig_buf = pkt->data;
 953
 954     // frame atom
 955     orig_buf += 4;                              // frame size
 956     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 957     buf = orig_buf;
 958
 959     // frame header
 960     tmp = buf;
 961     buf += 2;                                   // frame header size will be stored here
 962     bytestream_put_be16  (&buf, 0);             // version 1
 963     bytestream_put_buffer(&buf, ctx->vendor, 4);
 964     bytestream_put_be16  (&buf, avctx->width);
 965     bytestream_put_be16  (&buf, avctx->height);
 966
 967     frame_flags = ctx->chroma_factor << 6;
 968     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 969         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 970     bytestream_put_byte  (&buf, frame_flags);
 971
 972     bytestream_put_byte  (&buf, 0);             // reserved
 973     bytestream_put_byte  (&buf, avctx->color_primaries);
 974     bytestream_put_byte  (&buf, avctx->color_trc);
 975     bytestream_put_byte  (&buf, avctx->colorspace);
 976     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
 977     bytestream_put_byte  (&buf, 0);             // reserved
 978     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 979         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 980         // luma quantisation matrix
 981         for (i = 0; i < 64; i++)
 982             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 983         // chroma quantisation matrix
 984         for (i = 0; i < 64; i++)
 985             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 986     } else {
 987         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 988     }
 989     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 990
 991     for (ctx->cur_picture_idx = 0;
 992          ctx->cur_picture_idx < ctx->pictures_per_frame;
 993          ctx->cur_picture_idx++) {
 994         // picture header
 995         picture_size_pos = buf + 1;
 996         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 997         buf += 4;                                   // picture data size will be stored here
 998         bytestream_put_be16  (&buf, ctx->slices_per_picture);
 999         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1000
1001         // seek table - will be filled during slice encoding
1002         slice_sizes = buf;
1003         buf += ctx->slices_per_picture * 2;
1004
1005         // slices
1006         if (!ctx->force_quant) {
1007             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1008                                   ctx->mb_height);
1009             if (ret)
1010                 return ret;
1011         }
1012
1013         for (y = 0; y < ctx->mb_height; y++) {
1014             int mbs_per_slice = ctx->mbs_per_slice;
1015             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1016                 q = ctx->force_quant ? ctx->force_quant
1017                                      : ctx->slice_q[mb + y * ctx->slices_width];
1018
1019                 while (ctx->mb_width - x < mbs_per_slice)
1020                     mbs_per_slice >>= 1;
1021
1022                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1023                 slice_hdr = buf;
1024                 buf += slice_hdr_size - 1;
1025                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1026                     uint8_t *start = pkt->data;
1027                     // Recompute new size according to max_slice_size
1028                     // and deduce delta
1029                     int delta = 200 + ctx->pictures_per_frame *
1030                                 ctx->slices_per_picture * max_slice_size -
1031                                 pkt_size;
1032
1033                     delta = FFMAX(delta, 2 * max_slice_size);
1034                     ctx->frame_size_upper_bound += delta;
1035
1036                     if (!ctx->warn) {
1037                         avpriv_request_sample(avctx,
1038                                               "Packet too small: is %i,"
1039                                               " needs %i (slice: %i). "
1040                                               "Correct allocation",
1041                                               pkt_size, delta, max_slice_size);
1042                         ctx->warn = 1;
1043                     }
1044
1045                     ret = av_grow_packet(pkt, delta);
1046                     if (ret < 0)
1047                         return ret;
1048
1049                     pkt_size += delta;
1050                     // restore pointers
1051                     orig_buf         = pkt->data + (orig_buf         - start);
1052                     buf              = pkt->data + (buf              - start);
1053                     picture_size_pos = pkt->data + (picture_size_pos - start);
1054                     slice_sizes      = pkt->data + (slice_sizes      - start);
1055                     slice_hdr        = pkt->data + (slice_hdr        - start);
1056                     tmp              = pkt->data + (tmp              - start);
1057                 }
1058                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1059                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1060                                    mbs_per_slice);
1061                 if (ret < 0)
1062                     return ret;
1063
1064                 bytestream_put_byte(&slice_hdr, q);
1065                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1066                 for (i = 0; i < ctx->num_planes - 1; i++) {
1067                     bytestream_put_be16(&slice_hdr, sizes[i]);
1068                     slice_size += sizes[i];
1069                 }
1070                 bytestream_put_be16(&slice_sizes, slice_size);
1071                 buf += slice_size - slice_hdr_size;
1072                 if (max_slice_size < slice_size)
1073                     max_slice_size = slice_size;
1074             }
1075         }
1076
1077         if (ctx->pictures_per_frame == 1)
1078             picture_size = buf - picture_size_pos - 6;
1079         else
1080             picture_size = buf - picture_size_pos + 1;
1081         bytestream_put_be32(&picture_size_pos, picture_size);
1082     }
1083
1084     orig_buf -= 8;
1085     frame_size = buf - orig_buf;
1086     bytestream_put_be32(&orig_buf, frame_size);
1087
1088     pkt->size   = frame_size;
1089     pkt->flags |= AV_PKT_FLAG_KEY;
1090     *got_packet = 1;
1091
1092     return 0;
1093 }
1094
1095 static av_cold int encode_close(AVCodecContext *avctx)
1096 {
1097     ProresContext *ctx = avctx->priv_data;
1098     int i;
1099
1100     av_freep(&avctx->coded_frame);
1101
1102     if (ctx->tdata) {
1103         for (i = 0; i < avctx->thread_count; i++)
1104             av_free(ctx->tdata[i].nodes);
1105     }
1106     av_freep(&ctx->tdata);
1107     av_freep(&ctx->slice_q);
1108
1109     return 0;
1110 }
1111
1112 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1113                         int linesize, int16_t *block)
1114 {
1115     int x, y;
1116     const uint16_t *tsrc = src;
1117
1118     for (y = 0; y < 8; y++) {
1119         for (x = 0; x < 8; x++)
1120             block[y * 8 + x] = tsrc[x];
1121         tsrc += linesize >> 1;
1122     }
1123     fdsp->fdct(block);
1124 }
1125
1126 static av_cold int encode_init(AVCodecContext *avctx)
1127 {
1128     ProresContext *ctx = avctx->priv_data;
1129     int mps;
1130     int i, j;
1131     int min_quant, max_quant;
1132     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1133
1134     avctx->bits_per_raw_sample = 10;
1135     avctx->coded_frame = av_frame_alloc();
1136     if (!avctx->coded_frame)
1137         return AVERROR(ENOMEM);
1138
1139     ctx->fdct      = prores_fdct;
1140     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1141                                 : ff_prores_progressive_scan;
1142     ff_fdctdsp_init(&ctx->fdsp, avctx);
1143
1144     mps = ctx->mbs_per_slice;
1145     if (mps & (mps - 1)) {
1146         av_log(avctx, AV_LOG_ERROR,
1147                "there should be an integer power of two MBs per slice\n");
1148         return AVERROR(EINVAL);
1149     }
1150     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1151         if (ctx->alpha_bits & 7) {
1152             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1153             return AVERROR(EINVAL);
1154         }
1155     } else {
1156         ctx->alpha_bits = 0;
1157     }
1158
1159     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1160                          ? CFACTOR_Y422
1161                          : CFACTOR_Y444;
1162     ctx->profile_info  = prores_profile_info + ctx->profile;
1163     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1164
1165     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1166
1167     if (interlaced)
1168         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1169     else
1170         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1171
1172     ctx->slices_width  = ctx->mb_width / mps;
1173     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1174     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1175     ctx->pictures_per_frame = 1 + interlaced;
1176
1177     if (ctx->quant_sel == -1)
1178         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1179     else
1180         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1181
1182     if (strlen(ctx->vendor) != 4) {
1183         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1184         return AVERROR_INVALIDDATA;
1185     }
1186
1187     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1188     if (!ctx->force_quant) {
1189         if (!ctx->bits_per_mb) {
1190             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1191                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1192                                            ctx->pictures_per_frame)
1193                     break;
1194             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1195         } else if (ctx->bits_per_mb < 128) {
1196             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1197             return AVERROR_INVALIDDATA;
1198         }
1199
1200         min_quant = ctx->profile_info->min_quant;
1201         max_quant = ctx->profile_info->max_quant;
1202         for (i = min_quant; i < MAX_STORED_Q; i++) {
1203             for (j = 0; j < 64; j++)
1204                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1205         }
1206
1207         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1208         if (!ctx->slice_q) {
1209             encode_close(avctx);
1210             return AVERROR(ENOMEM);
1211         }
1212
1213         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1214         if (!ctx->tdata) {
1215             encode_close(avctx);
1216             return AVERROR(ENOMEM);
1217         }
1218
1219         for (j = 0; j < avctx->thread_count; j++) {
1220             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1221                                             * TRELLIS_WIDTH
1222                                             * sizeof(*ctx->tdata->nodes));
1223             if (!ctx->tdata[j].nodes) {
1224                 encode_close(avctx);
1225                 return AVERROR(ENOMEM);
1226             }
1227             for (i = min_quant; i < max_quant + 2; i++) {
1228                 ctx->tdata[j].nodes[i].prev_node = -1;
1229                 ctx->tdata[j].nodes[i].bits      = 0;
1230                 ctx->tdata[j].nodes[i].score     = 0;
1231             }
1232         }
1233     } else {
1234         int ls = 0;
1235
1236         if (ctx->force_quant > 64) {
1237             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1238             return AVERROR_INVALIDDATA;
1239         }
1240
1241         for (j = 0; j < 64; j++) {
1242             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1243             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1244         }
1245
1246         ctx->bits_per_mb = ls * 8;
1247         if (ctx->chroma_factor == CFACTOR_Y444)
1248             ctx->bits_per_mb += ls * 4;
1249     }
1250
1251     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1252                                   ctx->slices_per_picture *
1253                                   (2 + 2 * ctx->num_planes +
1254                                    (mps * ctx->bits_per_mb) / 8)
1255                                   + 200;
1256
1257     if (ctx->alpha_bits) {
1258          // The alpha plane is run-coded and might exceed the bit budget.
1259          ctx->frame_size_upper_bound += ctx->pictures_per_frame *
1260                                         ctx->slices_per_picture *
1261          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1262          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1263     }
1264
1265     avctx->codec_tag   = ctx->profile_info->tag;
1266
1267     av_log(avctx, AV_LOG_DEBUG,
1268            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1269            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1270            interlaced ? "yes" : "no", ctx->bits_per_mb);
1271     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1272            ctx->frame_size_upper_bound);
1273
1274     return 0;
1275 }
1276
1277 #define OFFSET(x) offsetof(ProresContext, x)
1278 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1279
1280 static const AVOption options[] = {
1281     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1282         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1283     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1284         { .i64 = PRORES_PROFILE_STANDARD },
1285         PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1286     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1287         0, 0, VE, "profile" },
1288     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1289         0, 0, VE, "profile" },
1290     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1291         0, 0, VE, "profile" },
1292     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1293         0, 0, VE, "profile" },
1294     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1295         0, 0, VE, "profile" },
1296     { "vendor", "vendor ID", OFFSET(vendor),
1297         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1298     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1299         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1300     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1301         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1302     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1303         0, 0, VE, "quant_mat" },
1304     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1305         0, 0, VE, "quant_mat" },
1306     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1307         0, 0, VE, "quant_mat" },
1308     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1309         0, 0, VE, "quant_mat" },
1310     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1311         0, 0, VE, "quant_mat" },
1312     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1313         0, 0, VE, "quant_mat" },
1314     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1315         { .i64 = 16 }, 0, 16, VE },
1316     { NULL }
1317 };
1318
1319 static const AVClass proresenc_class = {
1320     .class_name = "ProRes encoder",
1321     .item_name  = av_default_item_name,
1322     .option     = options,
1323     .version    = LIBAVUTIL_VERSION_INT,
1324 };
1325
1326 AVCodec ff_prores_encoder = {
1327     .name           = "prores",
1328     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1329     .type           = AVMEDIA_TYPE_VIDEO,
1330     .id             = AV_CODEC_ID_PRORES,
1331     .priv_data_size = sizeof(ProresContext),
1332     .init           = encode_init,
1333     .close          = encode_close,
1334     .encode2        = encode_frame,
1335     .capabilities   = CODEC_CAP_SLICE_THREADS,
1336     .pix_fmts       = (const enum AVPixelFormat[]) {
1337                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1338                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1339                       },
1340     .priv_class     = &proresenc_class,
1341 };