git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "libavutil/pixdesc.h"
  25 #include "avcodec.h"
  26 #include "fdctdsp.h"
  27 #include "put_bits.h"
  28 #include "bytestream.h"
  29 #include "internal.h"
  30 #include "proresdata.h"
  31
  32 #define CFACTOR_Y422 2
  33 #define CFACTOR_Y444 3
  34
  35 #define MAX_MBS_PER_SLICE 8
  36
  37 #define MAX_PLANES 4
  38
  39 enum {
  40     PRORES_PROFILE_PROXY = 0,
  41     PRORES_PROFILE_LT,
  42     PRORES_PROFILE_STANDARD,
  43     PRORES_PROFILE_HQ,
  44     PRORES_PROFILE_4444,
  45 };
  46
  47 enum {
  48     QUANT_MAT_PROXY = 0,
  49     QUANT_MAT_LT,
  50     QUANT_MAT_STANDARD,
  51     QUANT_MAT_HQ,
  52     QUANT_MAT_DEFAULT,
  53 };
  54
  55 static const uint8_t prores_quant_matrices[][64] = {
  56     { // proxy
  57          4,  7,  9, 11, 13, 14, 15, 63,
  58          7,  7, 11, 12, 14, 15, 63, 63,
  59          9, 11, 13, 14, 15, 63, 63, 63,
  60         11, 11, 13, 14, 63, 63, 63, 63,
  61         11, 13, 14, 63, 63, 63, 63, 63,
  62         13, 14, 63, 63, 63, 63, 63, 63,
  63         13, 63, 63, 63, 63, 63, 63, 63,
  64         63, 63, 63, 63, 63, 63, 63, 63,
  65     },
  66     { // LT
  67          4,  5,  6,  7,  9, 11, 13, 15,
  68          5,  5,  7,  8, 11, 13, 15, 17,
  69          6,  7,  9, 11, 13, 15, 15, 17,
  70          7,  7,  9, 11, 13, 15, 17, 19,
  71          7,  9, 11, 13, 14, 16, 19, 23,
  72          9, 11, 13, 14, 16, 19, 23, 29,
  73          9, 11, 13, 15, 17, 21, 28, 35,
  74         11, 13, 16, 17, 21, 28, 35, 41,
  75     },
  76     { // standard
  77          4,  4,  5,  5,  6,  7,  7,  9,
  78          4,  4,  5,  6,  7,  7,  9,  9,
  79          5,  5,  6,  7,  7,  9,  9, 10,
  80          5,  5,  6,  7,  7,  9,  9, 10,
  81          5,  6,  7,  7,  8,  9, 10, 12,
  82          6,  7,  7,  8,  9, 10, 12, 15,
  83          6,  7,  7,  9, 10, 11, 14, 17,
  84          7,  7,  9, 10, 11, 14, 17, 21,
  85     },
  86     { // high quality
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  4,
  89          4,  4,  4,  4,  4,  4,  4,  4,
  90          4,  4,  4,  4,  4,  4,  4,  5,
  91          4,  4,  4,  4,  4,  4,  5,  5,
  92          4,  4,  4,  4,  4,  5,  5,  6,
  93          4,  4,  4,  4,  5,  5,  6,  7,
  94          4,  4,  4,  4,  5,  6,  7,  7,
  95     },
  96     { // codec default
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105     },
 106 };
 107
 108 #define NUM_MB_LIMITS 4
 109 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 110     1620, // up to 720x576
 111     2700, // up to 960x720
 112     6075, // up to 1440x1080
 113     9216, // up to 2048x1152
 114 };
 115
 116 static const struct prores_profile {
 117     const char *full_name;
 118     uint32_t    tag;
 119     int         min_quant;
 120     int         max_quant;
 121     int         br_tab[NUM_MB_LIMITS];
 122     int         quant;
 123 } prores_profile_info[5] = {
 124     {
 125         .full_name = "proxy",
 126         .tag       = MKTAG('a', 'p', 'c', 'o'),
 127         .min_quant = 4,
 128         .max_quant = 8,
 129         .br_tab    = { 300, 242, 220, 194 },
 130         .quant     = QUANT_MAT_PROXY,
 131     },
 132     {
 133         .full_name = "LT",
 134         .tag       = MKTAG('a', 'p', 'c', 's'),
 135         .min_quant = 1,
 136         .max_quant = 9,
 137         .br_tab    = { 720, 560, 490, 440 },
 138         .quant     = QUANT_MAT_LT,
 139     },
 140     {
 141         .full_name = "standard",
 142         .tag       = MKTAG('a', 'p', 'c', 'n'),
 143         .min_quant = 1,
 144         .max_quant = 6,
 145         .br_tab    = { 1050, 808, 710, 632 },
 146         .quant     = QUANT_MAT_STANDARD,
 147     },
 148     {
 149         .full_name = "high quality",
 150         .tag       = MKTAG('a', 'p', 'c', 'h'),
 151         .min_quant = 1,
 152         .max_quant = 6,
 153         .br_tab    = { 1566, 1216, 1070, 950 },
 154         .quant     = QUANT_MAT_HQ,
 155     },
 156     {
 157         .full_name = "4444",
 158         .tag       = MKTAG('a', 'p', '4', 'h'),
 159         .min_quant = 1,
 160         .max_quant = 6,
 161         .br_tab    = { 2350, 1828, 1600, 1425 },
 162         .quant     = QUANT_MAT_HQ,
 163     }
 164 };
 165
 166 #define TRELLIS_WIDTH 16
 167 #define SCORE_LIMIT   INT_MAX / 2
 168
 169 struct TrellisNode {
 170     int prev_node;
 171     int quant;
 172     int bits;
 173     int score;
 174 };
 175
 176 #define MAX_STORED_Q 16
 177
 178 typedef struct ProresThreadData {
 179     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 180     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 181     int16_t custom_q[64];
 182     struct TrellisNode *nodes;
 183 } ProresThreadData;
 184
 185 typedef struct ProresContext {
 186     AVClass *class;
 187     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 188     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 189     int16_t quants[MAX_STORED_Q][64];
 190     int16_t custom_q[64];
 191     const uint8_t *quant_mat;
 192     const uint8_t *scantable;
 193
 194     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 195                  int linesize, int16_t *block);
 196     FDCTDSPContext fdsp;
 197
 198     int mb_width, mb_height;
 199     int mbs_per_slice;
 200     int num_chroma_blocks, chroma_factor;
 201     int slices_width;
 202     int slices_per_picture;
 203     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 204     int cur_picture_idx;
 205     int num_planes;
 206     int bits_per_mb;
 207     int force_quant;
 208     int alpha_bits;
 209     int warn;
 210
 211     char *vendor;
 212     int quant_sel;
 213
 214     int frame_size_upper_bound;
 215
 216     int profile;
 217     const struct prores_profile *profile_info;
 218
 219     int *slice_q;
 220
 221     ProresThreadData *tdata;
 222 } ProresContext;
 223
 224 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 225                            int linesize, int x, int y, int w, int h,
 226                            int16_t *blocks, uint16_t *emu_buf,
 227                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 228 {
 229     const uint16_t *esrc;
 230     const int mb_width = 4 * blocks_per_mb;
 231     int elinesize;
 232     int i, j, k;
 233
 234     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 235         if (x >= w) {
 236             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 237                               * sizeof(*blocks));
 238             return;
 239         }
 240         if (x + mb_width <= w && y + 16 <= h) {
 241             esrc      = src;
 242             elinesize = linesize;
 243         } else {
 244             int bw, bh, pix;
 245
 246             esrc      = emu_buf;
 247             elinesize = 16 * sizeof(*emu_buf);
 248
 249             bw = FFMIN(w - x, mb_width);
 250             bh = FFMIN(h - y, 16);
 251
 252             for (j = 0; j < bh; j++) {
 253                 memcpy(emu_buf + j * 16,
 254                        (const uint8_t*)src + j * linesize,
 255                        bw * sizeof(*src));
 256                 pix = emu_buf[j * 16 + bw - 1];
 257                 for (k = bw; k < mb_width; k++)
 258                     emu_buf[j * 16 + k] = pix;
 259             }
 260             for (; j < 16; j++)
 261                 memcpy(emu_buf + j * 16,
 262                        emu_buf + (bh - 1) * 16,
 263                        mb_width * sizeof(*emu_buf));
 264         }
 265         if (!is_chroma) {
 266             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 267             blocks += 64;
 268             if (blocks_per_mb > 2) {
 269                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 270                 blocks += 64;
 271             }
 272             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 273             blocks += 64;
 274             if (blocks_per_mb > 2) {
 275                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 276                 blocks += 64;
 277             }
 278         } else {
 279             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 280             blocks += 64;
 281             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 282             blocks += 64;
 283             if (blocks_per_mb > 2) {
 284                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 285                 blocks += 64;
 286                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 287                 blocks += 64;
 288             }
 289         }
 290
 291         x += mb_width;
 292     }
 293 }
 294
 295 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 296                            int linesize, int x, int y, int w, int h,
 297                            int16_t *blocks, int mbs_per_slice, int abits)
 298 {
 299     const int slice_width = 16 * mbs_per_slice;
 300     int i, j, copy_w, copy_h;
 301
 302     copy_w = FFMIN(w - x, slice_width);
 303     copy_h = FFMIN(h - y, 16);
 304     for (i = 0; i < copy_h; i++) {
 305         memcpy(blocks, src, copy_w * sizeof(*src));
 306         if (abits == 8)
 307             for (j = 0; j < copy_w; j++)
 308                 blocks[j] >>= 2;
 309         else
 310             for (j = 0; j < copy_w; j++)
 311                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 312         for (j = copy_w; j < slice_width; j++)
 313             blocks[j] = blocks[copy_w - 1];
 314         blocks += slice_width;
 315         src    += linesize >> 1;
 316     }
 317     for (; i < 16; i++) {
 318         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 319         blocks += slice_width;
 320     }
 321 }
 322
 323 /**
 324  * Write an unsigned rice/exp golomb codeword.
 325  */
 326 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 327 {
 328     unsigned int rice_order, exp_order, switch_bits, switch_val;
 329     int exponent;
 330
 331     /* number of prefix bits to switch between Rice and expGolomb */
 332     switch_bits = (codebook & 3) + 1;
 333     rice_order  =  codebook >> 5;       /* rice code order */
 334     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 335
 336     switch_val  = switch_bits << rice_order;
 337
 338     if (val >= switch_val) {
 339         val -= switch_val - (1 << exp_order);
 340         exponent = av_log2(val);
 341
 342         put_bits(pb, exponent - exp_order + switch_bits, 0);
 343         put_bits(pb, exponent + 1, val);
 344     } else {
 345         exponent = val >> rice_order;
 346
 347         if (exponent)
 348             put_bits(pb, exponent, 0);
 349         put_bits(pb, 1, 1);
 350         if (rice_order)
 351             put_sbits(pb, rice_order, val);
 352     }
 353 }
 354
 355 #define GET_SIGN(x)  ((x) >> 31)
 356 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 357
 358 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 359                        int blocks_per_slice, int scale)
 360 {
 361     int i;
 362     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 363
 364     prev_dc = (blocks[0] - 0x4000) / scale;
 365     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 366     sign     = 0;
 367     codebook = 3;
 368     blocks  += 64;
 369
 370     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 371         dc       = (blocks[0] - 0x4000) / scale;
 372         delta    = dc - prev_dc;
 373         new_sign = GET_SIGN(delta);
 374         delta    = (delta ^ sign) - sign;
 375         code     = MAKE_CODE(delta);
 376         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 377         codebook = (code + (code & 1)) >> 1;
 378         codebook = FFMIN(codebook, 3);
 379         sign     = new_sign;
 380         prev_dc  = dc;
 381     }
 382 }
 383
 384 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 385                        int blocks_per_slice,
 386                        int plane_size_factor,
 387                        const uint8_t *scan, const int16_t *qmat)
 388 {
 389     int idx, i;
 390     int run, level, run_cb, lev_cb;
 391     int max_coeffs, abs_level;
 392
 393     max_coeffs = blocks_per_slice << 6;
 394     run_cb     = ff_prores_run_to_cb_index[4];
 395     lev_cb     = ff_prores_lev_to_cb_index[2];
 396     run        = 0;
 397
 398     for (i = 1; i < 64; i++) {
 399         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 400             level = blocks[idx] / qmat[scan[i]];
 401             if (level) {
 402                 abs_level = FFABS(level);
 403                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 404                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 405                                     abs_level - 1);
 406                 put_sbits(pb, 1, GET_SIGN(level));
 407
 408                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 409                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 410                 run    = 0;
 411             } else {
 412                 run++;
 413             }
 414         }
 415     }
 416 }
 417
 418 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 419                               const uint16_t *src, int linesize,
 420                               int mbs_per_slice, int16_t *blocks,
 421                               int blocks_per_mb, int plane_size_factor,
 422                               const int16_t *qmat)
 423 {
 424     int blocks_per_slice, saved_pos;
 425
 426     saved_pos = put_bits_count(pb);
 427     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 428
 429     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 430     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 431                ctx->scantable, qmat);
 432     flush_put_bits(pb);
 433
 434     return (put_bits_count(pb) - saved_pos) >> 3;
 435 }
 436
 437 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 438 {
 439     const int mask  = (1 << abits) - 1;
 440     const int dbits = (abits == 8) ? 4 : 7;
 441     const int dsize = 1 << dbits - 1;
 442     int diff = cur - prev;
 443
 444     diff &= mask;
 445     if (diff >= (1 << abits) - dsize)
 446         diff -= 1 << abits;
 447     if (diff < -dsize || diff > dsize || !diff) {
 448         put_bits(pb, 1, 1);
 449         put_bits(pb, abits, diff);
 450     } else {
 451         put_bits(pb, 1, 0);
 452         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 453         put_bits(pb, 1, diff < 0);
 454     }
 455 }
 456
 457 static void put_alpha_run(PutBitContext *pb, int run)
 458 {
 459     if (run) {
 460         put_bits(pb, 1, 0);
 461         if (run < 0x10)
 462             put_bits(pb, 4, run);
 463         else
 464             put_bits(pb, 15, run);
 465     } else {
 466         put_bits(pb, 1, 1);
 467     }
 468 }
 469
 470 // todo alpha quantisation for high quants
 471 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 472                               int mbs_per_slice, uint16_t *blocks,
 473                               int quant)
 474 {
 475     const int abits = ctx->alpha_bits;
 476     const int mask  = (1 << abits) - 1;
 477     const int num_coeffs = mbs_per_slice * 256;
 478     int saved_pos = put_bits_count(pb);
 479     int prev = mask, cur;
 480     int idx = 0;
 481     int run = 0;
 482
 483     cur = blocks[idx++];
 484     put_alpha_diff(pb, cur, prev, abits);
 485     prev = cur;
 486     do {
 487         cur = blocks[idx++];
 488         if (cur != prev) {
 489             put_alpha_run (pb, run);
 490             put_alpha_diff(pb, cur, prev, abits);
 491             prev = cur;
 492             run  = 0;
 493         } else {
 494             run++;
 495         }
 496     } while (idx < num_coeffs);
 497     if (run)
 498         put_alpha_run(pb, run);
 499     flush_put_bits(pb);
 500     return (put_bits_count(pb) - saved_pos) >> 3;
 501 }
 502
 503 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 504                         PutBitContext *pb,
 505                         int sizes[4], int x, int y, int quant,
 506                         int mbs_per_slice)
 507 {
 508     ProresContext *ctx = avctx->priv_data;
 509     int i, xp, yp;
 510     int total_size = 0;
 511     const uint16_t *src;
 512     int slice_width_factor = av_log2(mbs_per_slice);
 513     int num_cblocks, pwidth, linesize, line_add;
 514     int plane_factor, is_chroma;
 515     uint16_t *qmat;
 516
 517     if (ctx->pictures_per_frame == 1)
 518         line_add = 0;
 519     else
 520         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 521
 522     if (ctx->force_quant) {
 523         qmat = ctx->quants[0];
 524     } else if (quant < MAX_STORED_Q) {
 525         qmat = ctx->quants[quant];
 526     } else {
 527         qmat = ctx->custom_q;
 528         for (i = 0; i < 64; i++)
 529             qmat[i] = ctx->quant_mat[i] * quant;
 530     }
 531
 532     for (i = 0; i < ctx->num_planes; i++) {
 533         is_chroma    = (i == 1 || i == 2);
 534         plane_factor = slice_width_factor + 2;
 535         if (is_chroma)
 536             plane_factor += ctx->chroma_factor - 3;
 537         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 538             xp          = x << 4;
 539             yp          = y << 4;
 540             num_cblocks = 4;
 541             pwidth      = avctx->width;
 542         } else {
 543             xp          = x << 3;
 544             yp          = y << 4;
 545             num_cblocks = 2;
 546             pwidth      = avctx->width >> 1;
 547         }
 548
 549         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 550         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 551                                 line_add * pic->linesize[i]) + xp;
 552
 553         if (i < 3) {
 554             get_slice_data(ctx, src, linesize, xp, yp,
 555                            pwidth, avctx->height / ctx->pictures_per_frame,
 556                            ctx->blocks[0], ctx->emu_buf,
 557                            mbs_per_slice, num_cblocks, is_chroma);
 558             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 559                                           mbs_per_slice, ctx->blocks[0],
 560                                           num_cblocks, plane_factor,
 561                                           qmat);
 562         } else {
 563             get_alpha_data(ctx, src, linesize, xp, yp,
 564                            pwidth, avctx->height / ctx->pictures_per_frame,
 565                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 566             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
 567                                           ctx->blocks[0], quant);
 568         }
 569         total_size += sizes[i];
 570         if (put_bits_left(pb) < 0) {
 571             av_log(avctx, AV_LOG_ERROR,
 572                    "Underestimated required buffer size.\n");
 573             return AVERROR_BUG;
 574         }
 575     }
 576     return total_size;
 577 }
 578
 579 static inline int estimate_vlc(unsigned codebook, int val)
 580 {
 581     unsigned int rice_order, exp_order, switch_bits, switch_val;
 582     int exponent;
 583
 584     /* number of prefix bits to switch between Rice and expGolomb */
 585     switch_bits = (codebook & 3) + 1;
 586     rice_order  =  codebook >> 5;       /* rice code order */
 587     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 588
 589     switch_val  = switch_bits << rice_order;
 590
 591     if (val >= switch_val) {
 592         val -= switch_val - (1 << exp_order);
 593         exponent = av_log2(val);
 594
 595         return exponent * 2 - exp_order + switch_bits + 1;
 596     } else {
 597         return (val >> rice_order) + rice_order + 1;
 598     }
 599 }
 600
 601 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 602                         int scale)
 603 {
 604     int i;
 605     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 606     int bits;
 607
 608     prev_dc  = (blocks[0] - 0x4000) / scale;
 609     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 610     sign     = 0;
 611     codebook = 3;
 612     blocks  += 64;
 613     *error  += FFABS(blocks[0] - 0x4000) % scale;
 614
 615     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 616         dc       = (blocks[0] - 0x4000) / scale;
 617         *error  += FFABS(blocks[0] - 0x4000) % scale;
 618         delta    = dc - prev_dc;
 619         new_sign = GET_SIGN(delta);
 620         delta    = (delta ^ sign) - sign;
 621         code     = MAKE_CODE(delta);
 622         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 623         codebook = (code + (code & 1)) >> 1;
 624         codebook = FFMIN(codebook, 3);
 625         sign     = new_sign;
 626         prev_dc  = dc;
 627     }
 628
 629     return bits;
 630 }
 631
 632 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 633                         int plane_size_factor,
 634                         const uint8_t *scan, const int16_t *qmat)
 635 {
 636     int idx, i;
 637     int run, level, run_cb, lev_cb;
 638     int max_coeffs, abs_level;
 639     int bits = 0;
 640
 641     max_coeffs = blocks_per_slice << 6;
 642     run_cb     = ff_prores_run_to_cb_index[4];
 643     lev_cb     = ff_prores_lev_to_cb_index[2];
 644     run        = 0;
 645
 646     for (i = 1; i < 64; i++) {
 647         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 648             level   = blocks[idx] / qmat[scan[i]];
 649             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 650             if (level) {
 651                 abs_level = FFABS(level);
 652                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 653                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 654                                      abs_level - 1) + 1;
 655
 656                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 657                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 658                 run    = 0;
 659             } else {
 660                 run++;
 661             }
 662         }
 663     }
 664
 665     return bits;
 666 }
 667
 668 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 669                                 const uint16_t *src, int linesize,
 670                                 int mbs_per_slice,
 671                                 int blocks_per_mb, int plane_size_factor,
 672                                 const int16_t *qmat, ProresThreadData *td)
 673 {
 674     int blocks_per_slice;
 675     int bits;
 676
 677     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 678
 679     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 680     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 681                          plane_size_factor, ctx->scantable, qmat);
 682
 683     return FFALIGN(bits, 8);
 684 }
 685
 686 static int est_alpha_diff(int cur, int prev, int abits)
 687 {
 688     const int mask  = (1 << abits) - 1;
 689     const int dbits = (abits == 8) ? 4 : 7;
 690     const int dsize = 1 << dbits - 1;
 691     int diff = cur - prev;
 692
 693     diff &= mask;
 694     if (diff >= (1 << abits) - dsize)
 695         diff -= 1 << abits;
 696     if (diff < -dsize || diff > dsize || !diff)
 697         return abits + 1;
 698     else
 699         return dbits + 1;
 700 }
 701
 702 static int estimate_alpha_plane(ProresContext *ctx, int *error,
 703                                 const uint16_t *src, int linesize,
 704                                 int mbs_per_slice, int quant,
 705                                 int16_t *blocks)
 706 {
 707     const int abits = ctx->alpha_bits;
 708     const int mask  = (1 << abits) - 1;
 709     const int num_coeffs = mbs_per_slice * 256;
 710     int prev = mask, cur;
 711     int idx = 0;
 712     int run = 0;
 713     int bits;
 714
 715     *error = 0;
 716     cur = blocks[idx++];
 717     bits = est_alpha_diff(cur, prev, abits);
 718     prev = cur;
 719     do {
 720         cur = blocks[idx++];
 721         if (cur != prev) {
 722             if (!run)
 723                 bits++;
 724             else if (run < 0x10)
 725                 bits += 4;
 726             else
 727                 bits += 15;
 728             bits += est_alpha_diff(cur, prev, abits);
 729             prev = cur;
 730             run  = 0;
 731         } else {
 732             run++;
 733         }
 734     } while (idx < num_coeffs);
 735
 736     if (run) {
 737         if (run < 0x10)
 738             bits += 4;
 739         else
 740             bits += 15;
 741     }
 742
 743     return bits;
 744 }
 745
 746 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 747                             int trellis_node, int x, int y, int mbs_per_slice,
 748                             ProresThreadData *td)
 749 {
 750     ProresContext *ctx = avctx->priv_data;
 751     int i, q, pq, xp, yp;
 752     const uint16_t *src;
 753     int slice_width_factor = av_log2(mbs_per_slice);
 754     int num_cblocks[MAX_PLANES], pwidth;
 755     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 756     const int min_quant = ctx->profile_info->min_quant;
 757     const int max_quant = ctx->profile_info->max_quant;
 758     int error, bits, bits_limit;
 759     int mbs, prev, cur, new_score;
 760     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 761     int overquant;
 762     uint16_t *qmat;
 763     int linesize[4], line_add;
 764
 765     if (ctx->pictures_per_frame == 1)
 766         line_add = 0;
 767     else
 768         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 769     mbs = x + mbs_per_slice;
 770
 771     for (i = 0; i < ctx->num_planes; i++) {
 772         is_chroma[i]    = (i == 1 || i == 2);
 773         plane_factor[i] = slice_width_factor + 2;
 774         if (is_chroma[i])
 775             plane_factor[i] += ctx->chroma_factor - 3;
 776         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 777             xp             = x << 4;
 778             yp             = y << 4;
 779             num_cblocks[i] = 4;
 780             pwidth         = avctx->width;
 781         } else {
 782             xp             = x << 3;
 783             yp             = y << 4;
 784             num_cblocks[i] = 2;
 785             pwidth         = avctx->width >> 1;
 786         }
 787
 788         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
 789         src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
 790                                 line_add * pic->linesize[i]) + xp;
 791
 792         if (i < 3) {
 793             get_slice_data(ctx, src, linesize[i], xp, yp,
 794                            pwidth, avctx->height / ctx->pictures_per_frame,
 795                            td->blocks[i], td->emu_buf,
 796                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 797         } else {
 798             get_alpha_data(ctx, src, linesize[i], xp, yp,
 799                            pwidth, avctx->height / ctx->pictures_per_frame,
 800                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 801         }
 802     }
 803
 804     for (q = min_quant; q < max_quant + 2; q++) {
 805         td->nodes[trellis_node + q].prev_node = -1;
 806         td->nodes[trellis_node + q].quant     = q;
 807     }
 808
 809     // todo: maybe perform coarser quantising to fit into frame size when needed
 810     for (q = min_quant; q <= max_quant; q++) {
 811         bits  = 0;
 812         error = 0;
 813         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 814             bits += estimate_slice_plane(ctx, &error, i,
 815                                          src, linesize[i],
 816                                          mbs_per_slice,
 817                                          num_cblocks[i], plane_factor[i],
 818                                          ctx->quants[q], td);
 819         }
 820         if (ctx->alpha_bits)
 821             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 822                                          mbs_per_slice, q, td->blocks[3]);
 823         if (bits > 65000 * 8)
 824             error = SCORE_LIMIT;
 825
 826         slice_bits[q]  = bits;
 827         slice_score[q] = error;
 828     }
 829     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 830         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 831         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 832         overquant = max_quant;
 833     } else {
 834         for (q = max_quant + 1; q < 128; q++) {
 835             bits  = 0;
 836             error = 0;
 837             if (q < MAX_STORED_Q) {
 838                 qmat = ctx->quants[q];
 839             } else {
 840                 qmat = td->custom_q;
 841                 for (i = 0; i < 64; i++)
 842                     qmat[i] = ctx->quant_mat[i] * q;
 843             }
 844             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 845                 bits += estimate_slice_plane(ctx, &error, i,
 846                                              src, linesize[i],
 847                                              mbs_per_slice,
 848                                              num_cblocks[i], plane_factor[i],
 849                                              qmat, td);
 850             }
 851             if (ctx->alpha_bits)
 852                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 853                                              mbs_per_slice, q, td->blocks[3]);
 854             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 855                 break;
 856         }
 857
 858         slice_bits[max_quant + 1]  = bits;
 859         slice_score[max_quant + 1] = error;
 860         overquant = q;
 861     }
 862     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 863
 864     bits_limit = mbs * ctx->bits_per_mb;
 865     for (pq = min_quant; pq < max_quant + 2; pq++) {
 866         prev = trellis_node - TRELLIS_WIDTH + pq;
 867
 868         for (q = min_quant; q < max_quant + 2; q++) {
 869             cur = trellis_node + q;
 870
 871             bits  = td->nodes[prev].bits + slice_bits[q];
 872             error = slice_score[q];
 873             if (bits > bits_limit)
 874                 error = SCORE_LIMIT;
 875
 876             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 877                 new_score = td->nodes[prev].score + error;
 878             else
 879                 new_score = SCORE_LIMIT;
 880             if (td->nodes[cur].prev_node == -1 ||
 881                 td->nodes[cur].score >= new_score) {
 882
 883                 td->nodes[cur].bits      = bits;
 884                 td->nodes[cur].score     = new_score;
 885                 td->nodes[cur].prev_node = prev;
 886             }
 887         }
 888     }
 889
 890     error = td->nodes[trellis_node + min_quant].score;
 891     pq    = trellis_node + min_quant;
 892     for (q = min_quant + 1; q < max_quant + 2; q++) {
 893         if (td->nodes[trellis_node + q].score <= error) {
 894             error = td->nodes[trellis_node + q].score;
 895             pq    = trellis_node + q;
 896         }
 897     }
 898
 899     return pq;
 900 }
 901
 902 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 903                              int jobnr, int threadnr)
 904 {
 905     ProresContext *ctx = avctx->priv_data;
 906     ProresThreadData *td = ctx->tdata + threadnr;
 907     int mbs_per_slice = ctx->mbs_per_slice;
 908     int x, y = jobnr, mb, q = 0;
 909
 910     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 911         while (ctx->mb_width - x < mbs_per_slice)
 912             mbs_per_slice >>= 1;
 913         q = find_slice_quant(avctx, avctx->coded_frame,
 914                              (mb + 1) * TRELLIS_WIDTH, x, y,
 915                              mbs_per_slice, td);
 916     }
 917
 918     for (x = ctx->slices_width - 1; x >= 0; x--) {
 919         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 920         q = td->nodes[q].prev_node;
 921     }
 922
 923     return 0;
 924 }
 925
 926 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 927                         const AVFrame *pic, int *got_packet)
 928 {
 929     ProresContext *ctx = avctx->priv_data;
 930     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 931     uint8_t *picture_size_pos;
 932     PutBitContext pb;
 933     int x, y, i, mb, q = 0;
 934     int sizes[4] = { 0 };
 935     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 936     int frame_size, picture_size, slice_size;
 937     int pkt_size, ret, max_slice_size = 0;
 938     uint8_t frame_flags;
 939
 940     *avctx->coded_frame           = *pic;
 941     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 942     avctx->coded_frame->key_frame = 1;
 943
 944     pkt_size = ctx->frame_size_upper_bound;
 945
 946     if ((ret = ff_alloc_packet(pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0) {
 947         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 948         return ret;
 949     }
 950
 951     orig_buf = pkt->data;
 952
 953     // frame atom
 954     orig_buf += 4;                              // frame size
 955     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 956     buf = orig_buf;
 957
 958     // frame header
 959     tmp = buf;
 960     buf += 2;                                   // frame header size will be stored here
 961     bytestream_put_be16  (&buf, 0);             // version 1
 962     bytestream_put_buffer(&buf, ctx->vendor, 4);
 963     bytestream_put_be16  (&buf, avctx->width);
 964     bytestream_put_be16  (&buf, avctx->height);
 965
 966     frame_flags = ctx->chroma_factor << 6;
 967     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 968         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 969     bytestream_put_byte  (&buf, frame_flags);
 970
 971     bytestream_put_byte  (&buf, 0);             // reserved
 972     bytestream_put_byte  (&buf, avctx->color_primaries);
 973     bytestream_put_byte  (&buf, avctx->color_trc);
 974     bytestream_put_byte  (&buf, avctx->colorspace);
 975     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
 976     bytestream_put_byte  (&buf, 0);             // reserved
 977     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 978         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 979         // luma quantisation matrix
 980         for (i = 0; i < 64; i++)
 981             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 982         // chroma quantisation matrix
 983         for (i = 0; i < 64; i++)
 984             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 985     } else {
 986         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 987     }
 988     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 989
 990     for (ctx->cur_picture_idx = 0;
 991          ctx->cur_picture_idx < ctx->pictures_per_frame;
 992          ctx->cur_picture_idx++) {
 993         // picture header
 994         picture_size_pos = buf + 1;
 995         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 996         buf += 4;                                   // picture data size will be stored here
 997         bytestream_put_be16  (&buf, ctx->slices_per_picture);
 998         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 999
1000         // seek table - will be filled during slice encoding
1001         slice_sizes = buf;
1002         buf += ctx->slices_per_picture * 2;
1003
1004         // slices
1005         if (!ctx->force_quant) {
1006             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1007                                   ctx->mb_height);
1008             if (ret)
1009                 return ret;
1010         }
1011
1012         for (y = 0; y < ctx->mb_height; y++) {
1013             int mbs_per_slice = ctx->mbs_per_slice;
1014             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1015                 q = ctx->force_quant ? ctx->force_quant
1016                                      : ctx->slice_q[mb + y * ctx->slices_width];
1017
1018                 while (ctx->mb_width - x < mbs_per_slice)
1019                     mbs_per_slice >>= 1;
1020
1021                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1022                 slice_hdr = buf;
1023                 buf += slice_hdr_size - 1;
1024                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1025                     uint8_t *start = pkt->data;
1026                     // Recompute new size according to max_slice_size
1027                     // and deduce delta
1028                     int delta = 200 + ctx->pictures_per_frame *
1029                                 ctx->slices_per_picture * max_slice_size -
1030                                 pkt_size;
1031
1032                     delta = FFMAX(delta, 2 * max_slice_size);
1033                     ctx->frame_size_upper_bound += delta;
1034
1035                     if (!ctx->warn) {
1036                         avpriv_request_sample(avctx,
1037                                               "Packet too small: is %i,"
1038                                               " needs %i (slice: %i). "
1039                                               "Correct allocation",
1040                                               pkt_size, delta, max_slice_size);
1041                         ctx->warn = 1;
1042                     }
1043
1044                     ret = av_grow_packet(pkt, delta);
1045                     if (ret < 0)
1046                         return ret;
1047
1048                     pkt_size += delta;
1049                     // restore pointers
1050                     orig_buf         = pkt->data + (orig_buf         - start);
1051                     buf              = pkt->data + (buf              - start);
1052                     picture_size_pos = pkt->data + (picture_size_pos - start);
1053                     slice_sizes      = pkt->data + (slice_sizes      - start);
1054                     slice_hdr        = pkt->data + (slice_hdr        - start);
1055                     tmp              = pkt->data + (tmp              - start);
1056                 }
1057                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1058                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1059                                    mbs_per_slice);
1060                 if (ret < 0)
1061                     return ret;
1062
1063                 bytestream_put_byte(&slice_hdr, q);
1064                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1065                 for (i = 0; i < ctx->num_planes - 1; i++) {
1066                     bytestream_put_be16(&slice_hdr, sizes[i]);
1067                     slice_size += sizes[i];
1068                 }
1069                 bytestream_put_be16(&slice_sizes, slice_size);
1070                 buf += slice_size - slice_hdr_size;
1071                 if (max_slice_size < slice_size)
1072                     max_slice_size = slice_size;
1073             }
1074         }
1075
1076         if (ctx->pictures_per_frame == 1)
1077             picture_size = buf - picture_size_pos - 6;
1078         else
1079             picture_size = buf - picture_size_pos + 1;
1080         bytestream_put_be32(&picture_size_pos, picture_size);
1081     }
1082
1083     orig_buf -= 8;
1084     frame_size = buf - orig_buf;
1085     bytestream_put_be32(&orig_buf, frame_size);
1086
1087     pkt->size   = frame_size;
1088     pkt->flags |= AV_PKT_FLAG_KEY;
1089     *got_packet = 1;
1090
1091     return 0;
1092 }
1093
1094 static av_cold int encode_close(AVCodecContext *avctx)
1095 {
1096     ProresContext *ctx = avctx->priv_data;
1097     int i;
1098
1099     av_freep(&avctx->coded_frame);
1100
1101     if (ctx->tdata) {
1102         for (i = 0; i < avctx->thread_count; i++)
1103             av_free(ctx->tdata[i].nodes);
1104     }
1105     av_freep(&ctx->tdata);
1106     av_freep(&ctx->slice_q);
1107
1108     return 0;
1109 }
1110
1111 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1112                         int linesize, int16_t *block)
1113 {
1114     int x, y;
1115     const uint16_t *tsrc = src;
1116
1117     for (y = 0; y < 8; y++) {
1118         for (x = 0; x < 8; x++)
1119             block[y * 8 + x] = tsrc[x];
1120         tsrc += linesize >> 1;
1121     }
1122     fdsp->fdct(block);
1123 }
1124
1125 static av_cold int encode_init(AVCodecContext *avctx)
1126 {
1127     ProresContext *ctx = avctx->priv_data;
1128     int mps;
1129     int i, j;
1130     int min_quant, max_quant;
1131     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1132
1133     avctx->bits_per_raw_sample = 10;
1134     avctx->coded_frame = av_frame_alloc();
1135     if (!avctx->coded_frame)
1136         return AVERROR(ENOMEM);
1137
1138     ctx->fdct      = prores_fdct;
1139     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1140                                 : ff_prores_progressive_scan;
1141     ff_fdctdsp_init(&ctx->fdsp, avctx);
1142
1143     mps = ctx->mbs_per_slice;
1144     if (mps & (mps - 1)) {
1145         av_log(avctx, AV_LOG_ERROR,
1146                "there should be an integer power of two MBs per slice\n");
1147         return AVERROR(EINVAL);
1148     }
1149     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1150         if (ctx->alpha_bits & 7) {
1151             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1152             return AVERROR(EINVAL);
1153         }
1154     } else {
1155         ctx->alpha_bits = 0;
1156     }
1157
1158     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1159                          ? CFACTOR_Y422
1160                          : CFACTOR_Y444;
1161     ctx->profile_info  = prores_profile_info + ctx->profile;
1162     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1163
1164     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1165
1166     if (interlaced)
1167         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1168     else
1169         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1170
1171     ctx->slices_width  = ctx->mb_width / mps;
1172     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1173     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1174     ctx->pictures_per_frame = 1 + interlaced;
1175
1176     if (ctx->quant_sel == -1)
1177         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1178     else
1179         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1180
1181     if (strlen(ctx->vendor) != 4) {
1182         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1183         return AVERROR_INVALIDDATA;
1184     }
1185
1186     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1187     if (!ctx->force_quant) {
1188         if (!ctx->bits_per_mb) {
1189             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1190                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1191                                            ctx->pictures_per_frame)
1192                     break;
1193             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1194         } else if (ctx->bits_per_mb < 128) {
1195             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1196             return AVERROR_INVALIDDATA;
1197         }
1198
1199         min_quant = ctx->profile_info->min_quant;
1200         max_quant = ctx->profile_info->max_quant;
1201         for (i = min_quant; i < MAX_STORED_Q; i++) {
1202             for (j = 0; j < 64; j++)
1203                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1204         }
1205
1206         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1207         if (!ctx->slice_q) {
1208             encode_close(avctx);
1209             return AVERROR(ENOMEM);
1210         }
1211
1212         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1213         if (!ctx->tdata) {
1214             encode_close(avctx);
1215             return AVERROR(ENOMEM);
1216         }
1217
1218         for (j = 0; j < avctx->thread_count; j++) {
1219             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1220                                             * TRELLIS_WIDTH
1221                                             * sizeof(*ctx->tdata->nodes));
1222             if (!ctx->tdata[j].nodes) {
1223                 encode_close(avctx);
1224                 return AVERROR(ENOMEM);
1225             }
1226             for (i = min_quant; i < max_quant + 2; i++) {
1227                 ctx->tdata[j].nodes[i].prev_node = -1;
1228                 ctx->tdata[j].nodes[i].bits      = 0;
1229                 ctx->tdata[j].nodes[i].score     = 0;
1230             }
1231         }
1232     } else {
1233         int ls = 0;
1234
1235         if (ctx->force_quant > 64) {
1236             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1237             return AVERROR_INVALIDDATA;
1238         }
1239
1240         for (j = 0; j < 64; j++) {
1241             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1242             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1243         }
1244
1245         ctx->bits_per_mb = ls * 8;
1246         if (ctx->chroma_factor == CFACTOR_Y444)
1247             ctx->bits_per_mb += ls * 4;
1248     }
1249
1250     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1251                                   ctx->slices_per_picture *
1252                                   (2 + 2 * ctx->num_planes +
1253                                    (mps * ctx->bits_per_mb) / 8)
1254                                   + 200;
1255
1256     if (ctx->alpha_bits) {
1257          // The alpha plane is run-coded and might exceed the bit budget.
1258          ctx->frame_size_upper_bound += ctx->pictures_per_frame *
1259                                         ctx->slices_per_picture *
1260          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1261          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1262     }
1263
1264     avctx->codec_tag   = ctx->profile_info->tag;
1265
1266     av_log(avctx, AV_LOG_DEBUG,
1267            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1268            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1269            interlaced ? "yes" : "no", ctx->bits_per_mb);
1270     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1271            ctx->frame_size_upper_bound);
1272
1273     return 0;
1274 }
1275
1276 #define OFFSET(x) offsetof(ProresContext, x)
1277 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1278
1279 static const AVOption options[] = {
1280     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1281         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1282     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1283         { .i64 = PRORES_PROFILE_STANDARD },
1284         PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1285     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1286         0, 0, VE, "profile" },
1287     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1288         0, 0, VE, "profile" },
1289     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1290         0, 0, VE, "profile" },
1291     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1292         0, 0, VE, "profile" },
1293     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1294         0, 0, VE, "profile" },
1295     { "vendor", "vendor ID", OFFSET(vendor),
1296         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1297     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1298         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1299     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1300         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1301     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1302         0, 0, VE, "quant_mat" },
1303     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1304         0, 0, VE, "quant_mat" },
1305     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1306         0, 0, VE, "quant_mat" },
1307     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1308         0, 0, VE, "quant_mat" },
1309     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1310         0, 0, VE, "quant_mat" },
1311     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1312         0, 0, VE, "quant_mat" },
1313     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1314         { .i64 = 16 }, 0, 16, VE },
1315     { NULL }
1316 };
1317
1318 static const AVClass proresenc_class = {
1319     .class_name = "ProRes encoder",
1320     .item_name  = av_default_item_name,
1321     .option     = options,
1322     .version    = LIBAVUTIL_VERSION_INT,
1323 };
1324
1325 AVCodec ff_prores_encoder = {
1326     .name           = "prores",
1327     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1328     .type           = AVMEDIA_TYPE_VIDEO,
1329     .id             = AV_CODEC_ID_PRORES,
1330     .priv_data_size = sizeof(ProresContext),
1331     .init           = encode_init,
1332     .close          = encode_close,
1333     .encode2        = encode_frame,
1334     .capabilities   = CODEC_CAP_SLICE_THREADS,
1335     .pix_fmts       = (const enum AVPixelFormat[]) {
1336                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1337                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1338                       },
1339     .priv_class     = &proresenc_class,
1340 };