git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This encoder appears to be based on Anatoliy Wassermans considering
   7  * similarities in the bugs.
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/opt.h"
  27 #include "libavutil/pixdesc.h"
  28 #include "avcodec.h"
  29 #include "fdctdsp.h"
  30 #include "put_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "proresdata.h"
  34
  35 #define CFACTOR_Y422 2
  36 #define CFACTOR_Y444 3
  37
  38 #define MAX_MBS_PER_SLICE 8
  39
  40 #define MAX_PLANES 4
  41
  42 enum {
  43     PRORES_PROFILE_PROXY = 0,
  44     PRORES_PROFILE_LT,
  45     PRORES_PROFILE_STANDARD,
  46     PRORES_PROFILE_HQ,
  47     PRORES_PROFILE_4444,
  48 };
  49
  50 enum {
  51     QUANT_MAT_PROXY = 0,
  52     QUANT_MAT_LT,
  53     QUANT_MAT_STANDARD,
  54     QUANT_MAT_HQ,
  55     QUANT_MAT_DEFAULT,
  56 };
  57
  58 static const uint8_t prores_quant_matrices[][64] = {
  59     { // proxy
  60          4,  7,  9, 11, 13, 14, 15, 63,
  61          7,  7, 11, 12, 14, 15, 63, 63,
  62          9, 11, 13, 14, 15, 63, 63, 63,
  63         11, 11, 13, 14, 63, 63, 63, 63,
  64         11, 13, 14, 63, 63, 63, 63, 63,
  65         13, 14, 63, 63, 63, 63, 63, 63,
  66         13, 63, 63, 63, 63, 63, 63, 63,
  67         63, 63, 63, 63, 63, 63, 63, 63,
  68     },
  69     { // LT
  70          4,  5,  6,  7,  9, 11, 13, 15,
  71          5,  5,  7,  8, 11, 13, 15, 17,
  72          6,  7,  9, 11, 13, 15, 15, 17,
  73          7,  7,  9, 11, 13, 15, 17, 19,
  74          7,  9, 11, 13, 14, 16, 19, 23,
  75          9, 11, 13, 14, 16, 19, 23, 29,
  76          9, 11, 13, 15, 17, 21, 28, 35,
  77         11, 13, 16, 17, 21, 28, 35, 41,
  78     },
  79     { // standard
  80          4,  4,  5,  5,  6,  7,  7,  9,
  81          4,  4,  5,  6,  7,  7,  9,  9,
  82          5,  5,  6,  7,  7,  9,  9, 10,
  83          5,  5,  6,  7,  7,  9,  9, 10,
  84          5,  6,  7,  7,  8,  9, 10, 12,
  85          6,  7,  7,  8,  9, 10, 12, 15,
  86          6,  7,  7,  9, 10, 11, 14, 17,
  87          7,  7,  9, 10, 11, 14, 17, 21,
  88     },
  89     { // high quality
  90          4,  4,  4,  4,  4,  4,  4,  4,
  91          4,  4,  4,  4,  4,  4,  4,  4,
  92          4,  4,  4,  4,  4,  4,  4,  4,
  93          4,  4,  4,  4,  4,  4,  4,  5,
  94          4,  4,  4,  4,  4,  4,  5,  5,
  95          4,  4,  4,  4,  4,  5,  5,  6,
  96          4,  4,  4,  4,  5,  5,  6,  7,
  97          4,  4,  4,  4,  5,  6,  7,  7,
  98     },
  99     { // codec default
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105          4,  4,  4,  4,  4,  4,  4,  4,
 106          4,  4,  4,  4,  4,  4,  4,  4,
 107          4,  4,  4,  4,  4,  4,  4,  4,
 108     },
 109 };
 110
 111 #define NUM_MB_LIMITS 4
 112 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 113     1620, // up to 720x576
 114     2700, // up to 960x720
 115     6075, // up to 1440x1080
 116     9216, // up to 2048x1152
 117 };
 118
 119 static const struct prores_profile {
 120     const char *full_name;
 121     uint32_t    tag;
 122     int         min_quant;
 123     int         max_quant;
 124     int         br_tab[NUM_MB_LIMITS];
 125     int         quant;
 126 } prores_profile_info[5] = {
 127     {
 128         .full_name = "proxy",
 129         .tag       = MKTAG('a', 'p', 'c', 'o'),
 130         .min_quant = 4,
 131         .max_quant = 8,
 132         .br_tab    = { 300, 242, 220, 194 },
 133         .quant     = QUANT_MAT_PROXY,
 134     },
 135     {
 136         .full_name = "LT",
 137         .tag       = MKTAG('a', 'p', 'c', 's'),
 138         .min_quant = 1,
 139         .max_quant = 9,
 140         .br_tab    = { 720, 560, 490, 440 },
 141         .quant     = QUANT_MAT_LT,
 142     },
 143     {
 144         .full_name = "standard",
 145         .tag       = MKTAG('a', 'p', 'c', 'n'),
 146         .min_quant = 1,
 147         .max_quant = 6,
 148         .br_tab    = { 1050, 808, 710, 632 },
 149         .quant     = QUANT_MAT_STANDARD,
 150     },
 151     {
 152         .full_name = "high quality",
 153         .tag       = MKTAG('a', 'p', 'c', 'h'),
 154         .min_quant = 1,
 155         .max_quant = 6,
 156         .br_tab    = { 1566, 1216, 1070, 950 },
 157         .quant     = QUANT_MAT_HQ,
 158     },
 159     {
 160         .full_name = "4444",
 161         .tag       = MKTAG('a', 'p', '4', 'h'),
 162         .min_quant = 1,
 163         .max_quant = 6,
 164         .br_tab    = { 2350, 1828, 1600, 1425 },
 165         .quant     = QUANT_MAT_HQ,
 166     }
 167 };
 168
 169 #define TRELLIS_WIDTH 16
 170 #define SCORE_LIMIT   INT_MAX / 2
 171
 172 struct TrellisNode {
 173     int prev_node;
 174     int quant;
 175     int bits;
 176     int score;
 177 };
 178
 179 #define MAX_STORED_Q 16
 180
 181 typedef struct ProresThreadData {
 182     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 183     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 184     int16_t custom_q[64];
 185     struct TrellisNode *nodes;
 186 } ProresThreadData;
 187
 188 typedef struct ProresContext {
 189     AVClass *class;
 190     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 191     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 192     int16_t quants[MAX_STORED_Q][64];
 193     int16_t custom_q[64];
 194     const uint8_t *quant_mat;
 195     const uint8_t *scantable;
 196
 197     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 198                  int linesize, int16_t *block);
 199     FDCTDSPContext fdsp;
 200
 201     int mb_width, mb_height;
 202     int mbs_per_slice;
 203     int num_chroma_blocks, chroma_factor;
 204     int slices_width;
 205     int slices_per_picture;
 206     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 207     int cur_picture_idx;
 208     int num_planes;
 209     int bits_per_mb;
 210     int force_quant;
 211     int alpha_bits;
 212     int warn;
 213
 214     char *vendor;
 215     int quant_sel;
 216
 217     int frame_size_upper_bound;
 218
 219     int profile;
 220     const struct prores_profile *profile_info;
 221
 222     int *slice_q;
 223
 224     ProresThreadData *tdata;
 225 } ProresContext;
 226
 227 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 228                            int linesize, int x, int y, int w, int h,
 229                            int16_t *blocks, uint16_t *emu_buf,
 230                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 231 {
 232     const uint16_t *esrc;
 233     const int mb_width = 4 * blocks_per_mb;
 234     int elinesize;
 235     int i, j, k;
 236
 237     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 238         if (x >= w) {
 239             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 240                               * sizeof(*blocks));
 241             return;
 242         }
 243         if (x + mb_width <= w && y + 16 <= h) {
 244             esrc      = src;
 245             elinesize = linesize;
 246         } else {
 247             int bw, bh, pix;
 248
 249             esrc      = emu_buf;
 250             elinesize = 16 * sizeof(*emu_buf);
 251
 252             bw = FFMIN(w - x, mb_width);
 253             bh = FFMIN(h - y, 16);
 254
 255             for (j = 0; j < bh; j++) {
 256                 memcpy(emu_buf + j * 16,
 257                        (const uint8_t*)src + j * linesize,
 258                        bw * sizeof(*src));
 259                 pix = emu_buf[j * 16 + bw - 1];
 260                 for (k = bw; k < mb_width; k++)
 261                     emu_buf[j * 16 + k] = pix;
 262             }
 263             for (; j < 16; j++)
 264                 memcpy(emu_buf + j * 16,
 265                        emu_buf + (bh - 1) * 16,
 266                        mb_width * sizeof(*emu_buf));
 267         }
 268         if (!is_chroma) {
 269             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 270             blocks += 64;
 271             if (blocks_per_mb > 2) {
 272                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 273                 blocks += 64;
 274             }
 275             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 276             blocks += 64;
 277             if (blocks_per_mb > 2) {
 278                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 279                 blocks += 64;
 280             }
 281         } else {
 282             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 283             blocks += 64;
 284             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 285             blocks += 64;
 286             if (blocks_per_mb > 2) {
 287                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 288                 blocks += 64;
 289                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 290                 blocks += 64;
 291             }
 292         }
 293
 294         x += mb_width;
 295     }
 296 }
 297
 298 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 299                            int linesize, int x, int y, int w, int h,
 300                            int16_t *blocks, int mbs_per_slice, int abits)
 301 {
 302     const int slice_width = 16 * mbs_per_slice;
 303     int i, j, copy_w, copy_h;
 304
 305     copy_w = FFMIN(w - x, slice_width);
 306     copy_h = FFMIN(h - y, 16);
 307     for (i = 0; i < copy_h; i++) {
 308         memcpy(blocks, src, copy_w * sizeof(*src));
 309         if (abits == 8)
 310             for (j = 0; j < copy_w; j++)
 311                 blocks[j] >>= 2;
 312         else
 313             for (j = 0; j < copy_w; j++)
 314                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 315         for (j = copy_w; j < slice_width; j++)
 316             blocks[j] = blocks[copy_w - 1];
 317         blocks += slice_width;
 318         src    += linesize >> 1;
 319     }
 320     for (; i < 16; i++) {
 321         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 322         blocks += slice_width;
 323     }
 324 }
 325
 326 /**
 327  * Write an unsigned rice/exp golomb codeword.
 328  */
 329 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 330 {
 331     unsigned int rice_order, exp_order, switch_bits, switch_val;
 332     int exponent;
 333
 334     /* number of prefix bits to switch between Rice and expGolomb */
 335     switch_bits = (codebook & 3) + 1;
 336     rice_order  =  codebook >> 5;       /* rice code order */
 337     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 338
 339     switch_val  = switch_bits << rice_order;
 340
 341     if (val >= switch_val) {
 342         val -= switch_val - (1 << exp_order);
 343         exponent = av_log2(val);
 344
 345         put_bits(pb, exponent - exp_order + switch_bits, 0);
 346         put_bits(pb, exponent + 1, val);
 347     } else {
 348         exponent = val >> rice_order;
 349
 350         if (exponent)
 351             put_bits(pb, exponent, 0);
 352         put_bits(pb, 1, 1);
 353         if (rice_order)
 354             put_sbits(pb, rice_order, val);
 355     }
 356 }
 357
 358 #define GET_SIGN(x)  ((x) >> 31)
 359 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 360
 361 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 362                        int blocks_per_slice, int scale)
 363 {
 364     int i;
 365     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 366
 367     prev_dc = (blocks[0] - 0x4000) / scale;
 368     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 369     sign     = 0;
 370     codebook = 3;
 371     blocks  += 64;
 372
 373     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 374         dc       = (blocks[0] - 0x4000) / scale;
 375         delta    = dc - prev_dc;
 376         new_sign = GET_SIGN(delta);
 377         delta    = (delta ^ sign) - sign;
 378         code     = MAKE_CODE(delta);
 379         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 380         codebook = (code + (code & 1)) >> 1;
 381         codebook = FFMIN(codebook, 3);
 382         sign     = new_sign;
 383         prev_dc  = dc;
 384     }
 385 }
 386
 387 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 388                        int blocks_per_slice,
 389                        int plane_size_factor,
 390                        const uint8_t *scan, const int16_t *qmat)
 391 {
 392     int idx, i;
 393     int run, level, run_cb, lev_cb;
 394     int max_coeffs, abs_level;
 395
 396     max_coeffs = blocks_per_slice << 6;
 397     run_cb     = ff_prores_run_to_cb_index[4];
 398     lev_cb     = ff_prores_lev_to_cb_index[2];
 399     run        = 0;
 400
 401     for (i = 1; i < 64; i++) {
 402         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 403             level = blocks[idx] / qmat[scan[i]];
 404             if (level) {
 405                 abs_level = FFABS(level);
 406                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 407                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 408                                     abs_level - 1);
 409                 put_sbits(pb, 1, GET_SIGN(level));
 410
 411                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 412                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 413                 run    = 0;
 414             } else {
 415                 run++;
 416             }
 417         }
 418     }
 419 }
 420
 421 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 422                               const uint16_t *src, int linesize,
 423                               int mbs_per_slice, int16_t *blocks,
 424                               int blocks_per_mb, int plane_size_factor,
 425                               const int16_t *qmat)
 426 {
 427     int blocks_per_slice, saved_pos;
 428
 429     saved_pos = put_bits_count(pb);
 430     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 431
 432     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 433     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 434                ctx->scantable, qmat);
 435     flush_put_bits(pb);
 436
 437     return (put_bits_count(pb) - saved_pos) >> 3;
 438 }
 439
 440 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 441 {
 442     const int mask  = (1 << abits) - 1;
 443     const int dbits = (abits == 8) ? 4 : 7;
 444     const int dsize = 1 << dbits - 1;
 445     int diff = cur - prev;
 446
 447     diff &= mask;
 448     if (diff >= (1 << abits) - dsize)
 449         diff -= 1 << abits;
 450     if (diff < -dsize || diff > dsize || !diff) {
 451         put_bits(pb, 1, 1);
 452         put_bits(pb, abits, diff);
 453     } else {
 454         put_bits(pb, 1, 0);
 455         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 456         put_bits(pb, 1, diff < 0);
 457     }
 458 }
 459
 460 static void put_alpha_run(PutBitContext *pb, int run)
 461 {
 462     if (run) {
 463         put_bits(pb, 1, 0);
 464         if (run < 0x10)
 465             put_bits(pb, 4, run);
 466         else
 467             put_bits(pb, 15, run);
 468     } else {
 469         put_bits(pb, 1, 1);
 470     }
 471 }
 472
 473 // todo alpha quantisation for high quants
 474 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 475                               int mbs_per_slice, uint16_t *blocks,
 476                               int quant)
 477 {
 478     const int abits = ctx->alpha_bits;
 479     const int mask  = (1 << abits) - 1;
 480     const int num_coeffs = mbs_per_slice * 256;
 481     int saved_pos = put_bits_count(pb);
 482     int prev = mask, cur;
 483     int idx = 0;
 484     int run = 0;
 485
 486     cur = blocks[idx++];
 487     put_alpha_diff(pb, cur, prev, abits);
 488     prev = cur;
 489     do {
 490         cur = blocks[idx++];
 491         if (cur != prev) {
 492             put_alpha_run (pb, run);
 493             put_alpha_diff(pb, cur, prev, abits);
 494             prev = cur;
 495             run  = 0;
 496         } else {
 497             run++;
 498         }
 499     } while (idx < num_coeffs);
 500     if (run)
 501         put_alpha_run(pb, run);
 502     flush_put_bits(pb);
 503     return (put_bits_count(pb) - saved_pos) >> 3;
 504 }
 505
 506 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 507                         PutBitContext *pb,
 508                         int sizes[4], int x, int y, int quant,
 509                         int mbs_per_slice)
 510 {
 511     ProresContext *ctx = avctx->priv_data;
 512     int i, xp, yp;
 513     int total_size = 0;
 514     const uint16_t *src;
 515     int slice_width_factor = av_log2(mbs_per_slice);
 516     int num_cblocks, pwidth, linesize, line_add;
 517     int plane_factor, is_chroma;
 518     uint16_t *qmat;
 519
 520     if (ctx->pictures_per_frame == 1)
 521         line_add = 0;
 522     else
 523         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 524
 525     if (ctx->force_quant) {
 526         qmat = ctx->quants[0];
 527     } else if (quant < MAX_STORED_Q) {
 528         qmat = ctx->quants[quant];
 529     } else {
 530         qmat = ctx->custom_q;
 531         for (i = 0; i < 64; i++)
 532             qmat[i] = ctx->quant_mat[i] * quant;
 533     }
 534
 535     for (i = 0; i < ctx->num_planes; i++) {
 536         is_chroma    = (i == 1 || i == 2);
 537         plane_factor = slice_width_factor + 2;
 538         if (is_chroma)
 539             plane_factor += ctx->chroma_factor - 3;
 540         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 541             xp          = x << 4;
 542             yp          = y << 4;
 543             num_cblocks = 4;
 544             pwidth      = avctx->width;
 545         } else {
 546             xp          = x << 3;
 547             yp          = y << 4;
 548             num_cblocks = 2;
 549             pwidth      = avctx->width >> 1;
 550         }
 551
 552         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 553         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 554                                 line_add * pic->linesize[i]) + xp;
 555
 556         if (i < 3) {
 557             get_slice_data(ctx, src, linesize, xp, yp,
 558                            pwidth, avctx->height / ctx->pictures_per_frame,
 559                            ctx->blocks[0], ctx->emu_buf,
 560                            mbs_per_slice, num_cblocks, is_chroma);
 561             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 562                                           mbs_per_slice, ctx->blocks[0],
 563                                           num_cblocks, plane_factor,
 564                                           qmat);
 565         } else {
 566             get_alpha_data(ctx, src, linesize, xp, yp,
 567                            pwidth, avctx->height / ctx->pictures_per_frame,
 568                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 569             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
 570                                           ctx->blocks[0], quant);
 571         }
 572         total_size += sizes[i];
 573         if (put_bits_left(pb) < 0) {
 574             av_log(avctx, AV_LOG_ERROR,
 575                    "Underestimated required buffer size.\n");
 576             return AVERROR_BUG;
 577         }
 578     }
 579     return total_size;
 580 }
 581
 582 static inline int estimate_vlc(unsigned codebook, int val)
 583 {
 584     unsigned int rice_order, exp_order, switch_bits, switch_val;
 585     int exponent;
 586
 587     /* number of prefix bits to switch between Rice and expGolomb */
 588     switch_bits = (codebook & 3) + 1;
 589     rice_order  =  codebook >> 5;       /* rice code order */
 590     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 591
 592     switch_val  = switch_bits << rice_order;
 593
 594     if (val >= switch_val) {
 595         val -= switch_val - (1 << exp_order);
 596         exponent = av_log2(val);
 597
 598         return exponent * 2 - exp_order + switch_bits + 1;
 599     } else {
 600         return (val >> rice_order) + rice_order + 1;
 601     }
 602 }
 603
 604 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 605                         int scale)
 606 {
 607     int i;
 608     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 609     int bits;
 610
 611     prev_dc  = (blocks[0] - 0x4000) / scale;
 612     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 613     sign     = 0;
 614     codebook = 3;
 615     blocks  += 64;
 616     *error  += FFABS(blocks[0] - 0x4000) % scale;
 617
 618     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 619         dc       = (blocks[0] - 0x4000) / scale;
 620         *error  += FFABS(blocks[0] - 0x4000) % scale;
 621         delta    = dc - prev_dc;
 622         new_sign = GET_SIGN(delta);
 623         delta    = (delta ^ sign) - sign;
 624         code     = MAKE_CODE(delta);
 625         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 626         codebook = (code + (code & 1)) >> 1;
 627         codebook = FFMIN(codebook, 3);
 628         sign     = new_sign;
 629         prev_dc  = dc;
 630     }
 631
 632     return bits;
 633 }
 634
 635 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 636                         int plane_size_factor,
 637                         const uint8_t *scan, const int16_t *qmat)
 638 {
 639     int idx, i;
 640     int run, level, run_cb, lev_cb;
 641     int max_coeffs, abs_level;
 642     int bits = 0;
 643
 644     max_coeffs = blocks_per_slice << 6;
 645     run_cb     = ff_prores_run_to_cb_index[4];
 646     lev_cb     = ff_prores_lev_to_cb_index[2];
 647     run        = 0;
 648
 649     for (i = 1; i < 64; i++) {
 650         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 651             level   = blocks[idx] / qmat[scan[i]];
 652             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 653             if (level) {
 654                 abs_level = FFABS(level);
 655                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 656                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 657                                      abs_level - 1) + 1;
 658
 659                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 660                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 661                 run    = 0;
 662             } else {
 663                 run++;
 664             }
 665         }
 666     }
 667
 668     return bits;
 669 }
 670
 671 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 672                                 const uint16_t *src, int linesize,
 673                                 int mbs_per_slice,
 674                                 int blocks_per_mb, int plane_size_factor,
 675                                 const int16_t *qmat, ProresThreadData *td)
 676 {
 677     int blocks_per_slice;
 678     int bits;
 679
 680     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 681
 682     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 683     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 684                          plane_size_factor, ctx->scantable, qmat);
 685
 686     return FFALIGN(bits, 8);
 687 }
 688
 689 static int est_alpha_diff(int cur, int prev, int abits)
 690 {
 691     const int mask  = (1 << abits) - 1;
 692     const int dbits = (abits == 8) ? 4 : 7;
 693     const int dsize = 1 << dbits - 1;
 694     int diff = cur - prev;
 695
 696     diff &= mask;
 697     if (diff >= (1 << abits) - dsize)
 698         diff -= 1 << abits;
 699     if (diff < -dsize || diff > dsize || !diff)
 700         return abits + 1;
 701     else
 702         return dbits + 1;
 703 }
 704
 705 static int estimate_alpha_plane(ProresContext *ctx, int *error,
 706                                 const uint16_t *src, int linesize,
 707                                 int mbs_per_slice, int quant,
 708                                 int16_t *blocks)
 709 {
 710     const int abits = ctx->alpha_bits;
 711     const int mask  = (1 << abits) - 1;
 712     const int num_coeffs = mbs_per_slice * 256;
 713     int prev = mask, cur;
 714     int idx = 0;
 715     int run = 0;
 716     int bits;
 717
 718     *error = 0;
 719     cur = blocks[idx++];
 720     bits = est_alpha_diff(cur, prev, abits);
 721     prev = cur;
 722     do {
 723         cur = blocks[idx++];
 724         if (cur != prev) {
 725             if (!run)
 726                 bits++;
 727             else if (run < 0x10)
 728                 bits += 4;
 729             else
 730                 bits += 15;
 731             bits += est_alpha_diff(cur, prev, abits);
 732             prev = cur;
 733             run  = 0;
 734         } else {
 735             run++;
 736         }
 737     } while (idx < num_coeffs);
 738
 739     if (run) {
 740         if (run < 0x10)
 741             bits += 4;
 742         else
 743             bits += 15;
 744     }
 745
 746     return bits;
 747 }
 748
 749 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 750                             int trellis_node, int x, int y, int mbs_per_slice,
 751                             ProresThreadData *td)
 752 {
 753     ProresContext *ctx = avctx->priv_data;
 754     int i, q, pq, xp, yp;
 755     const uint16_t *src;
 756     int slice_width_factor = av_log2(mbs_per_slice);
 757     int num_cblocks[MAX_PLANES], pwidth;
 758     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 759     const int min_quant = ctx->profile_info->min_quant;
 760     const int max_quant = ctx->profile_info->max_quant;
 761     int error, bits, bits_limit;
 762     int mbs, prev, cur, new_score;
 763     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 764     int overquant;
 765     uint16_t *qmat;
 766     int linesize[4], line_add;
 767
 768     if (ctx->pictures_per_frame == 1)
 769         line_add = 0;
 770     else
 771         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 772     mbs = x + mbs_per_slice;
 773
 774     for (i = 0; i < ctx->num_planes; i++) {
 775         is_chroma[i]    = (i == 1 || i == 2);
 776         plane_factor[i] = slice_width_factor + 2;
 777         if (is_chroma[i])
 778             plane_factor[i] += ctx->chroma_factor - 3;
 779         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 780             xp             = x << 4;
 781             yp             = y << 4;
 782             num_cblocks[i] = 4;
 783             pwidth         = avctx->width;
 784         } else {
 785             xp             = x << 3;
 786             yp             = y << 4;
 787             num_cblocks[i] = 2;
 788             pwidth         = avctx->width >> 1;
 789         }
 790
 791         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
 792         src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
 793                                 line_add * pic->linesize[i]) + xp;
 794
 795         if (i < 3) {
 796             get_slice_data(ctx, src, linesize[i], xp, yp,
 797                            pwidth, avctx->height / ctx->pictures_per_frame,
 798                            td->blocks[i], td->emu_buf,
 799                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 800         } else {
 801             get_alpha_data(ctx, src, linesize[i], xp, yp,
 802                            pwidth, avctx->height / ctx->pictures_per_frame,
 803                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 804         }
 805     }
 806
 807     for (q = min_quant; q < max_quant + 2; q++) {
 808         td->nodes[trellis_node + q].prev_node = -1;
 809         td->nodes[trellis_node + q].quant     = q;
 810     }
 811
 812     // todo: maybe perform coarser quantising to fit into frame size when needed
 813     for (q = min_quant; q <= max_quant; q++) {
 814         bits  = 0;
 815         error = 0;
 816         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 817             bits += estimate_slice_plane(ctx, &error, i,
 818                                          src, linesize[i],
 819                                          mbs_per_slice,
 820                                          num_cblocks[i], plane_factor[i],
 821                                          ctx->quants[q], td);
 822         }
 823         if (ctx->alpha_bits)
 824             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 825                                          mbs_per_slice, q, td->blocks[3]);
 826         if (bits > 65000 * 8) {
 827             error = SCORE_LIMIT;
 828             break;
 829         }
 830         slice_bits[q]  = bits;
 831         slice_score[q] = error;
 832     }
 833     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 834         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 835         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 836         overquant = max_quant;
 837     } else {
 838         for (q = max_quant + 1; q < 128; q++) {
 839             bits  = 0;
 840             error = 0;
 841             if (q < MAX_STORED_Q) {
 842                 qmat = ctx->quants[q];
 843             } else {
 844                 qmat = td->custom_q;
 845                 for (i = 0; i < 64; i++)
 846                     qmat[i] = ctx->quant_mat[i] * q;
 847             }
 848             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 849                 bits += estimate_slice_plane(ctx, &error, i,
 850                                              src, linesize[i],
 851                                              mbs_per_slice,
 852                                              num_cblocks[i], plane_factor[i],
 853                                              qmat, td);
 854             }
 855             if (ctx->alpha_bits)
 856                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 857                                              mbs_per_slice, q, td->blocks[3]);
 858             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 859                 break;
 860         }
 861
 862         slice_bits[max_quant + 1]  = bits;
 863         slice_score[max_quant + 1] = error;
 864         overquant = q;
 865     }
 866     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 867
 868     bits_limit = mbs * ctx->bits_per_mb;
 869     for (pq = min_quant; pq < max_quant + 2; pq++) {
 870         prev = trellis_node - TRELLIS_WIDTH + pq;
 871
 872         for (q = min_quant; q < max_quant + 2; q++) {
 873             cur = trellis_node + q;
 874
 875             bits  = td->nodes[prev].bits + slice_bits[q];
 876             error = slice_score[q];
 877             if (bits > bits_limit)
 878                 error = SCORE_LIMIT;
 879
 880             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 881                 new_score = td->nodes[prev].score + error;
 882             else
 883                 new_score = SCORE_LIMIT;
 884             if (td->nodes[cur].prev_node == -1 ||
 885                 td->nodes[cur].score >= new_score) {
 886
 887                 td->nodes[cur].bits      = bits;
 888                 td->nodes[cur].score     = new_score;
 889                 td->nodes[cur].prev_node = prev;
 890             }
 891         }
 892     }
 893
 894     error = td->nodes[trellis_node + min_quant].score;
 895     pq    = trellis_node + min_quant;
 896     for (q = min_quant + 1; q < max_quant + 2; q++) {
 897         if (td->nodes[trellis_node + q].score <= error) {
 898             error = td->nodes[trellis_node + q].score;
 899             pq    = trellis_node + q;
 900         }
 901     }
 902
 903     return pq;
 904 }
 905
 906 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 907                              int jobnr, int threadnr)
 908 {
 909     ProresContext *ctx = avctx->priv_data;
 910     ProresThreadData *td = ctx->tdata + threadnr;
 911     int mbs_per_slice = ctx->mbs_per_slice;
 912     int x, y = jobnr, mb, q = 0;
 913
 914     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 915         while (ctx->mb_width - x < mbs_per_slice)
 916             mbs_per_slice >>= 1;
 917         q = find_slice_quant(avctx, avctx->coded_frame,
 918                              (mb + 1) * TRELLIS_WIDTH, x, y,
 919                              mbs_per_slice, td);
 920     }
 921
 922     for (x = ctx->slices_width - 1; x >= 0; x--) {
 923         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 924         q = td->nodes[q].prev_node;
 925     }
 926
 927     return 0;
 928 }
 929
 930 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 931                         const AVFrame *pic, int *got_packet)
 932 {
 933     ProresContext *ctx = avctx->priv_data;
 934     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 935     uint8_t *picture_size_pos;
 936     PutBitContext pb;
 937     int x, y, i, mb, q = 0;
 938     int sizes[4] = { 0 };
 939     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 940     int frame_size, picture_size, slice_size;
 941     int pkt_size, ret;
 942     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
 943     uint8_t frame_flags;
 944
 945     *avctx->coded_frame           = *pic;
 946     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 947     avctx->coded_frame->key_frame = 1;
 948
 949     pkt_size = ctx->frame_size_upper_bound;
 950
 951     if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
 952         return ret;
 953
 954     orig_buf = pkt->data;
 955
 956     // frame atom
 957     orig_buf += 4;                              // frame size
 958     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 959     buf = orig_buf;
 960
 961     // frame header
 962     tmp = buf;
 963     buf += 2;                                   // frame header size will be stored here
 964     bytestream_put_be16  (&buf, 0);             // version 1
 965     bytestream_put_buffer(&buf, ctx->vendor, 4);
 966     bytestream_put_be16  (&buf, avctx->width);
 967     bytestream_put_be16  (&buf, avctx->height);
 968
 969     frame_flags = ctx->chroma_factor << 6;
 970     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 971         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 972     bytestream_put_byte  (&buf, frame_flags);
 973
 974     bytestream_put_byte  (&buf, 0);             // reserved
 975     bytestream_put_byte  (&buf, avctx->color_primaries);
 976     bytestream_put_byte  (&buf, avctx->color_trc);
 977     bytestream_put_byte  (&buf, avctx->colorspace);
 978     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
 979     bytestream_put_byte  (&buf, 0);             // reserved
 980     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 981         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 982         // luma quantisation matrix
 983         for (i = 0; i < 64; i++)
 984             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 985         // chroma quantisation matrix
 986         for (i = 0; i < 64; i++)
 987             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 988     } else {
 989         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 990     }
 991     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 992
 993     for (ctx->cur_picture_idx = 0;
 994          ctx->cur_picture_idx < ctx->pictures_per_frame;
 995          ctx->cur_picture_idx++) {
 996         // picture header
 997         picture_size_pos = buf + 1;
 998         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 999         buf += 4;                                   // picture data size will be stored here
1000         bytestream_put_be16  (&buf, ctx->slices_per_picture);
1001         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1002
1003         // seek table - will be filled during slice encoding
1004         slice_sizes = buf;
1005         buf += ctx->slices_per_picture * 2;
1006
1007         // slices
1008         if (!ctx->force_quant) {
1009             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1010                                   ctx->mb_height);
1011             if (ret)
1012                 return ret;
1013         }
1014
1015         for (y = 0; y < ctx->mb_height; y++) {
1016             int mbs_per_slice = ctx->mbs_per_slice;
1017             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1018                 q = ctx->force_quant ? ctx->force_quant
1019                                      : ctx->slice_q[mb + y * ctx->slices_width];
1020
1021                 while (ctx->mb_width - x < mbs_per_slice)
1022                     mbs_per_slice >>= 1;
1023
1024                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1025                 slice_hdr = buf;
1026                 buf += slice_hdr_size - 1;
1027                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1028                     uint8_t *start = pkt->data;
1029                     // Recompute new size according to max_slice_size
1030                     // and deduce delta
1031                     int delta = 200 + (ctx->pictures_per_frame *
1032                                 ctx->slices_per_picture + 1) *
1033                                 max_slice_size - pkt_size;
1034
1035                     delta = FFMAX(delta, 2 * max_slice_size);
1036                     ctx->frame_size_upper_bound += delta;
1037
1038                     if (!ctx->warn) {
1039                         avpriv_request_sample(avctx,
1040                                               "Packet too small: is %i,"
1041                                               " needs %i (slice: %i). "
1042                                               "Correct allocation",
1043                                               pkt_size, delta, max_slice_size);
1044                         ctx->warn = 1;
1045                     }
1046
1047                     ret = av_grow_packet(pkt, delta);
1048                     if (ret < 0)
1049                         return ret;
1050
1051                     pkt_size += delta;
1052                     // restore pointers
1053                     orig_buf         = pkt->data + (orig_buf         - start);
1054                     buf              = pkt->data + (buf              - start);
1055                     picture_size_pos = pkt->data + (picture_size_pos - start);
1056                     slice_sizes      = pkt->data + (slice_sizes      - start);
1057                     slice_hdr        = pkt->data + (slice_hdr        - start);
1058                     tmp              = pkt->data + (tmp              - start);
1059                 }
1060                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1061                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1062                                    mbs_per_slice);
1063                 if (ret < 0)
1064                     return ret;
1065
1066                 bytestream_put_byte(&slice_hdr, q);
1067                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1068                 for (i = 0; i < ctx->num_planes - 1; i++) {
1069                     bytestream_put_be16(&slice_hdr, sizes[i]);
1070                     slice_size += sizes[i];
1071                 }
1072                 bytestream_put_be16(&slice_sizes, slice_size);
1073                 buf += slice_size - slice_hdr_size;
1074                 if (max_slice_size < slice_size)
1075                     max_slice_size = slice_size;
1076             }
1077         }
1078
1079         picture_size = buf - (picture_size_pos - 1);
1080         bytestream_put_be32(&picture_size_pos, picture_size);
1081     }
1082
1083     orig_buf -= 8;
1084     frame_size = buf - orig_buf;
1085     bytestream_put_be32(&orig_buf, frame_size);
1086
1087     pkt->size   = frame_size;
1088     pkt->flags |= AV_PKT_FLAG_KEY;
1089     *got_packet = 1;
1090
1091     return 0;
1092 }
1093
1094 static av_cold int encode_close(AVCodecContext *avctx)
1095 {
1096     ProresContext *ctx = avctx->priv_data;
1097     int i;
1098
1099     av_freep(&avctx->coded_frame);
1100
1101     if (ctx->tdata) {
1102         for (i = 0; i < avctx->thread_count; i++)
1103             av_free(ctx->tdata[i].nodes);
1104     }
1105     av_freep(&ctx->tdata);
1106     av_freep(&ctx->slice_q);
1107
1108     return 0;
1109 }
1110
1111 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1112                         int linesize, int16_t *block)
1113 {
1114     int x, y;
1115     const uint16_t *tsrc = src;
1116
1117     for (y = 0; y < 8; y++) {
1118         for (x = 0; x < 8; x++)
1119             block[y * 8 + x] = tsrc[x];
1120         tsrc += linesize >> 1;
1121     }
1122     fdsp->fdct(block);
1123 }
1124
1125 static av_cold int encode_init(AVCodecContext *avctx)
1126 {
1127     ProresContext *ctx = avctx->priv_data;
1128     int mps;
1129     int i, j;
1130     int min_quant, max_quant;
1131     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1132
1133     avctx->bits_per_raw_sample = 10;
1134     avctx->coded_frame = av_frame_alloc();
1135     if (!avctx->coded_frame)
1136         return AVERROR(ENOMEM);
1137
1138     ctx->fdct      = prores_fdct;
1139     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1140                                 : ff_prores_progressive_scan;
1141     ff_fdctdsp_init(&ctx->fdsp, avctx);
1142
1143     mps = ctx->mbs_per_slice;
1144     if (mps & (mps - 1)) {
1145         av_log(avctx, AV_LOG_ERROR,
1146                "there should be an integer power of two MBs per slice\n");
1147         return AVERROR(EINVAL);
1148     }
1149     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1150         if (ctx->alpha_bits & 7) {
1151             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1152             return AVERROR(EINVAL);
1153         }
1154     } else {
1155         ctx->alpha_bits = 0;
1156     }
1157
1158     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1159                          ? CFACTOR_Y422
1160                          : CFACTOR_Y444;
1161     ctx->profile_info  = prores_profile_info + ctx->profile;
1162     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1163
1164     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1165
1166     if (interlaced)
1167         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1168     else
1169         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1170
1171     ctx->slices_width  = ctx->mb_width / mps;
1172     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1173     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1174     ctx->pictures_per_frame = 1 + interlaced;
1175
1176     if (ctx->quant_sel == -1)
1177         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1178     else
1179         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1180
1181     if (strlen(ctx->vendor) != 4) {
1182         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1183         return AVERROR_INVALIDDATA;
1184     }
1185
1186     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1187     if (!ctx->force_quant) {
1188         if (!ctx->bits_per_mb) {
1189             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1190                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1191                                            ctx->pictures_per_frame)
1192                     break;
1193             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1194         } else if (ctx->bits_per_mb < 128) {
1195             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1196             return AVERROR_INVALIDDATA;
1197         }
1198
1199         min_quant = ctx->profile_info->min_quant;
1200         max_quant = ctx->profile_info->max_quant;
1201         for (i = min_quant; i < MAX_STORED_Q; i++) {
1202             for (j = 0; j < 64; j++)
1203                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1204         }
1205
1206         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1207         if (!ctx->slice_q) {
1208             encode_close(avctx);
1209             return AVERROR(ENOMEM);
1210         }
1211
1212         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1213         if (!ctx->tdata) {
1214             encode_close(avctx);
1215             return AVERROR(ENOMEM);
1216         }
1217
1218         for (j = 0; j < avctx->thread_count; j++) {
1219             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1220                                             * TRELLIS_WIDTH
1221                                             * sizeof(*ctx->tdata->nodes));
1222             if (!ctx->tdata[j].nodes) {
1223                 encode_close(avctx);
1224                 return AVERROR(ENOMEM);
1225             }
1226             for (i = min_quant; i < max_quant + 2; i++) {
1227                 ctx->tdata[j].nodes[i].prev_node = -1;
1228                 ctx->tdata[j].nodes[i].bits      = 0;
1229                 ctx->tdata[j].nodes[i].score     = 0;
1230             }
1231         }
1232     } else {
1233         int ls = 0;
1234
1235         if (ctx->force_quant > 64) {
1236             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1237             return AVERROR_INVALIDDATA;
1238         }
1239
1240         for (j = 0; j < 64; j++) {
1241             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1242             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1243         }
1244
1245         ctx->bits_per_mb = ls * 8;
1246         if (ctx->chroma_factor == CFACTOR_Y444)
1247             ctx->bits_per_mb += ls * 4;
1248     }
1249
1250     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1251                                    ctx->slices_per_picture + 1) *
1252                                   (2 + 2 * ctx->num_planes +
1253                                    (mps * ctx->bits_per_mb) / 8)
1254                                   + 200;
1255
1256     if (ctx->alpha_bits) {
1257          // The alpha plane is run-coded and might exceed the bit budget.
1258          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1259                                          ctx->slices_per_picture + 1) *
1260          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1261          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1262     }
1263
1264     avctx->codec_tag   = ctx->profile_info->tag;
1265
1266     av_log(avctx, AV_LOG_DEBUG,
1267            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1268            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1269            interlaced ? "yes" : "no", ctx->bits_per_mb);
1270     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1271            ctx->frame_size_upper_bound);
1272
1273     return 0;
1274 }
1275
1276 #define OFFSET(x) offsetof(ProresContext, x)
1277 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1278
1279 static const AVOption options[] = {
1280     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1281         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1282     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1283         { .i64 = PRORES_PROFILE_STANDARD },
1284         PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1285     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1286         0, 0, VE, "profile" },
1287     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1288         0, 0, VE, "profile" },
1289     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1290         0, 0, VE, "profile" },
1291     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1292         0, 0, VE, "profile" },
1293     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1294         0, 0, VE, "profile" },
1295     { "vendor", "vendor ID", OFFSET(vendor),
1296         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1297     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1298         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1299     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1300         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1301     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1302         0, 0, VE, "quant_mat" },
1303     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1304         0, 0, VE, "quant_mat" },
1305     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1306         0, 0, VE, "quant_mat" },
1307     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1308         0, 0, VE, "quant_mat" },
1309     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1310         0, 0, VE, "quant_mat" },
1311     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1312         0, 0, VE, "quant_mat" },
1313     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1314         { .i64 = 16 }, 0, 16, VE },
1315     { NULL }
1316 };
1317
1318 static const AVClass proresenc_class = {
1319     .class_name = "ProRes encoder",
1320     .item_name  = av_default_item_name,
1321     .option     = options,
1322     .version    = LIBAVUTIL_VERSION_INT,
1323 };
1324
1325 AVCodec ff_prores_ks_encoder = {
1326     .name           = "prores_ks",
1327     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1328     .type           = AVMEDIA_TYPE_VIDEO,
1329     .id             = AV_CODEC_ID_PRORES,
1330     .priv_data_size = sizeof(ProresContext),
1331     .init           = encode_init,
1332     .close          = encode_close,
1333     .encode2        = encode_frame,
1334     .capabilities   = CODEC_CAP_SLICE_THREADS,
1335     .pix_fmts       = (const enum AVPixelFormat[]) {
1336                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1337                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1338                       },
1339     .priv_class     = &proresenc_class,
1340 };