git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This encoder appears to be based on Anatoliy Wassermans considering
   7  * similarities in the bugs.
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/opt.h"
  27 #include "libavutil/pixdesc.h"
  28 #include "avcodec.h"
  29 #include "fdctdsp.h"
  30 #include "put_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "proresdata.h"
  34
  35 #define CFACTOR_Y422 2
  36 #define CFACTOR_Y444 3
  37
  38 #define MAX_MBS_PER_SLICE 8
  39
  40 #define MAX_PLANES 4
  41
  42 enum {
  43     PRORES_PROFILE_AUTO  = -1,
  44     PRORES_PROFILE_PROXY = 0,
  45     PRORES_PROFILE_LT,
  46     PRORES_PROFILE_STANDARD,
  47     PRORES_PROFILE_HQ,
  48     PRORES_PROFILE_4444,
  49 };
  50
  51 enum {
  52     QUANT_MAT_PROXY = 0,
  53     QUANT_MAT_LT,
  54     QUANT_MAT_STANDARD,
  55     QUANT_MAT_HQ,
  56     QUANT_MAT_DEFAULT,
  57 };
  58
  59 static const uint8_t prores_quant_matrices[][64] = {
  60     { // proxy
  61          4,  7,  9, 11, 13, 14, 15, 63,
  62          7,  7, 11, 12, 14, 15, 63, 63,
  63          9, 11, 13, 14, 15, 63, 63, 63,
  64         11, 11, 13, 14, 63, 63, 63, 63,
  65         11, 13, 14, 63, 63, 63, 63, 63,
  66         13, 14, 63, 63, 63, 63, 63, 63,
  67         13, 63, 63, 63, 63, 63, 63, 63,
  68         63, 63, 63, 63, 63, 63, 63, 63,
  69     },
  70     { // LT
  71          4,  5,  6,  7,  9, 11, 13, 15,
  72          5,  5,  7,  8, 11, 13, 15, 17,
  73          6,  7,  9, 11, 13, 15, 15, 17,
  74          7,  7,  9, 11, 13, 15, 17, 19,
  75          7,  9, 11, 13, 14, 16, 19, 23,
  76          9, 11, 13, 14, 16, 19, 23, 29,
  77          9, 11, 13, 15, 17, 21, 28, 35,
  78         11, 13, 16, 17, 21, 28, 35, 41,
  79     },
  80     { // standard
  81          4,  4,  5,  5,  6,  7,  7,  9,
  82          4,  4,  5,  6,  7,  7,  9,  9,
  83          5,  5,  6,  7,  7,  9,  9, 10,
  84          5,  5,  6,  7,  7,  9,  9, 10,
  85          5,  6,  7,  7,  8,  9, 10, 12,
  86          6,  7,  7,  8,  9, 10, 12, 15,
  87          6,  7,  7,  9, 10, 11, 14, 17,
  88          7,  7,  9, 10, 11, 14, 17, 21,
  89     },
  90     { // high quality
  91          4,  4,  4,  4,  4,  4,  4,  4,
  92          4,  4,  4,  4,  4,  4,  4,  4,
  93          4,  4,  4,  4,  4,  4,  4,  4,
  94          4,  4,  4,  4,  4,  4,  4,  5,
  95          4,  4,  4,  4,  4,  4,  5,  5,
  96          4,  4,  4,  4,  4,  5,  5,  6,
  97          4,  4,  4,  4,  5,  5,  6,  7,
  98          4,  4,  4,  4,  5,  6,  7,  7,
  99     },
 100     { // codec default
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105          4,  4,  4,  4,  4,  4,  4,  4,
 106          4,  4,  4,  4,  4,  4,  4,  4,
 107          4,  4,  4,  4,  4,  4,  4,  4,
 108          4,  4,  4,  4,  4,  4,  4,  4,
 109     },
 110 };
 111
 112 #define NUM_MB_LIMITS 4
 113 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 114     1620, // up to 720x576
 115     2700, // up to 960x720
 116     6075, // up to 1440x1080
 117     9216, // up to 2048x1152
 118 };
 119
 120 static const struct prores_profile {
 121     const char *full_name;
 122     uint32_t    tag;
 123     int         min_quant;
 124     int         max_quant;
 125     int         br_tab[NUM_MB_LIMITS];
 126     int         quant;
 127 } prores_profile_info[5] = {
 128     {
 129         .full_name = "proxy",
 130         .tag       = MKTAG('a', 'p', 'c', 'o'),
 131         .min_quant = 4,
 132         .max_quant = 8,
 133         .br_tab    = { 300, 242, 220, 194 },
 134         .quant     = QUANT_MAT_PROXY,
 135     },
 136     {
 137         .full_name = "LT",
 138         .tag       = MKTAG('a', 'p', 'c', 's'),
 139         .min_quant = 1,
 140         .max_quant = 9,
 141         .br_tab    = { 720, 560, 490, 440 },
 142         .quant     = QUANT_MAT_LT,
 143     },
 144     {
 145         .full_name = "standard",
 146         .tag       = MKTAG('a', 'p', 'c', 'n'),
 147         .min_quant = 1,
 148         .max_quant = 6,
 149         .br_tab    = { 1050, 808, 710, 632 },
 150         .quant     = QUANT_MAT_STANDARD,
 151     },
 152     {
 153         .full_name = "high quality",
 154         .tag       = MKTAG('a', 'p', 'c', 'h'),
 155         .min_quant = 1,
 156         .max_quant = 6,
 157         .br_tab    = { 1566, 1216, 1070, 950 },
 158         .quant     = QUANT_MAT_HQ,
 159     },
 160     {
 161         .full_name = "4444",
 162         .tag       = MKTAG('a', 'p', '4', 'h'),
 163         .min_quant = 1,
 164         .max_quant = 6,
 165         .br_tab    = { 2350, 1828, 1600, 1425 },
 166         .quant     = QUANT_MAT_HQ,
 167     }
 168 };
 169
 170 #define TRELLIS_WIDTH 16
 171 #define SCORE_LIMIT   INT_MAX / 2
 172
 173 struct TrellisNode {
 174     int prev_node;
 175     int quant;
 176     int bits;
 177     int score;
 178 };
 179
 180 #define MAX_STORED_Q 16
 181
 182 typedef struct ProresThreadData {
 183     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 184     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 185     int16_t custom_q[64];
 186     struct TrellisNode *nodes;
 187 } ProresThreadData;
 188
 189 typedef struct ProresContext {
 190     AVClass *class;
 191     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 192     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 193     int16_t quants[MAX_STORED_Q][64];
 194     int16_t custom_q[64];
 195     const uint8_t *quant_mat;
 196     const uint8_t *scantable;
 197
 198     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 199                  int linesize, int16_t *block);
 200     FDCTDSPContext fdsp;
 201
 202     const AVFrame *pic;
 203     int mb_width, mb_height;
 204     int mbs_per_slice;
 205     int num_chroma_blocks, chroma_factor;
 206     int slices_width;
 207     int slices_per_picture;
 208     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 209     int cur_picture_idx;
 210     int num_planes;
 211     int bits_per_mb;
 212     int force_quant;
 213     int alpha_bits;
 214     int warn;
 215
 216     char *vendor;
 217     int quant_sel;
 218
 219     int frame_size_upper_bound;
 220
 221     int profile;
 222     const struct prores_profile *profile_info;
 223
 224     int *slice_q;
 225
 226     ProresThreadData *tdata;
 227 } ProresContext;
 228
 229 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 230                            int linesize, int x, int y, int w, int h,
 231                            int16_t *blocks, uint16_t *emu_buf,
 232                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 233 {
 234     const uint16_t *esrc;
 235     const int mb_width = 4 * blocks_per_mb;
 236     int elinesize;
 237     int i, j, k;
 238
 239     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 240         if (x >= w) {
 241             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 242                               * sizeof(*blocks));
 243             return;
 244         }
 245         if (x + mb_width <= w && y + 16 <= h) {
 246             esrc      = src;
 247             elinesize = linesize;
 248         } else {
 249             int bw, bh, pix;
 250
 251             esrc      = emu_buf;
 252             elinesize = 16 * sizeof(*emu_buf);
 253
 254             bw = FFMIN(w - x, mb_width);
 255             bh = FFMIN(h - y, 16);
 256
 257             for (j = 0; j < bh; j++) {
 258                 memcpy(emu_buf + j * 16,
 259                        (const uint8_t*)src + j * linesize,
 260                        bw * sizeof(*src));
 261                 pix = emu_buf[j * 16 + bw - 1];
 262                 for (k = bw; k < mb_width; k++)
 263                     emu_buf[j * 16 + k] = pix;
 264             }
 265             for (; j < 16; j++)
 266                 memcpy(emu_buf + j * 16,
 267                        emu_buf + (bh - 1) * 16,
 268                        mb_width * sizeof(*emu_buf));
 269         }
 270         if (!is_chroma) {
 271             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 272             blocks += 64;
 273             if (blocks_per_mb > 2) {
 274                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 275                 blocks += 64;
 276             }
 277             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 278             blocks += 64;
 279             if (blocks_per_mb > 2) {
 280                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 281                 blocks += 64;
 282             }
 283         } else {
 284             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 285             blocks += 64;
 286             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 287             blocks += 64;
 288             if (blocks_per_mb > 2) {
 289                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 290                 blocks += 64;
 291                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 292                 blocks += 64;
 293             }
 294         }
 295
 296         x += mb_width;
 297     }
 298 }
 299
 300 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 301                            int linesize, int x, int y, int w, int h,
 302                            int16_t *blocks, int mbs_per_slice, int abits)
 303 {
 304     const int slice_width = 16 * mbs_per_slice;
 305     int i, j, copy_w, copy_h;
 306
 307     copy_w = FFMIN(w - x, slice_width);
 308     copy_h = FFMIN(h - y, 16);
 309     for (i = 0; i < copy_h; i++) {
 310         memcpy(blocks, src, copy_w * sizeof(*src));
 311         if (abits == 8)
 312             for (j = 0; j < copy_w; j++)
 313                 blocks[j] >>= 2;
 314         else
 315             for (j = 0; j < copy_w; j++)
 316                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 317         for (j = copy_w; j < slice_width; j++)
 318             blocks[j] = blocks[copy_w - 1];
 319         blocks += slice_width;
 320         src    += linesize >> 1;
 321     }
 322     for (; i < 16; i++) {
 323         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 324         blocks += slice_width;
 325     }
 326 }
 327
 328 /**
 329  * Write an unsigned rice/exp golomb codeword.
 330  */
 331 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 332 {
 333     unsigned int rice_order, exp_order, switch_bits, switch_val;
 334     int exponent;
 335
 336     /* number of prefix bits to switch between Rice and expGolomb */
 337     switch_bits = (codebook & 3) + 1;
 338     rice_order  =  codebook >> 5;       /* rice code order */
 339     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 340
 341     switch_val  = switch_bits << rice_order;
 342
 343     if (val >= switch_val) {
 344         val -= switch_val - (1 << exp_order);
 345         exponent = av_log2(val);
 346
 347         put_bits(pb, exponent - exp_order + switch_bits, 0);
 348         put_bits(pb, exponent + 1, val);
 349     } else {
 350         exponent = val >> rice_order;
 351
 352         if (exponent)
 353             put_bits(pb, exponent, 0);
 354         put_bits(pb, 1, 1);
 355         if (rice_order)
 356             put_sbits(pb, rice_order, val);
 357     }
 358 }
 359
 360 #define GET_SIGN(x)  ((x) >> 31)
 361 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 362
 363 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 364                        int blocks_per_slice, int scale)
 365 {
 366     int i;
 367     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 368
 369     prev_dc = (blocks[0] - 0x4000) / scale;
 370     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 371     sign     = 0;
 372     codebook = 3;
 373     blocks  += 64;
 374
 375     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 376         dc       = (blocks[0] - 0x4000) / scale;
 377         delta    = dc - prev_dc;
 378         new_sign = GET_SIGN(delta);
 379         delta    = (delta ^ sign) - sign;
 380         code     = MAKE_CODE(delta);
 381         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 382         codebook = (code + (code & 1)) >> 1;
 383         codebook = FFMIN(codebook, 3);
 384         sign     = new_sign;
 385         prev_dc  = dc;
 386     }
 387 }
 388
 389 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 390                        int blocks_per_slice,
 391                        int plane_size_factor,
 392                        const uint8_t *scan, const int16_t *qmat)
 393 {
 394     int idx, i;
 395     int run, level, run_cb, lev_cb;
 396     int max_coeffs, abs_level;
 397
 398     max_coeffs = blocks_per_slice << 6;
 399     run_cb     = ff_prores_run_to_cb_index[4];
 400     lev_cb     = ff_prores_lev_to_cb_index[2];
 401     run        = 0;
 402
 403     for (i = 1; i < 64; i++) {
 404         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 405             level = blocks[idx] / qmat[scan[i]];
 406             if (level) {
 407                 abs_level = FFABS(level);
 408                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 409                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 410                                     abs_level - 1);
 411                 put_sbits(pb, 1, GET_SIGN(level));
 412
 413                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 414                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 415                 run    = 0;
 416             } else {
 417                 run++;
 418             }
 419         }
 420     }
 421 }
 422
 423 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 424                               const uint16_t *src, int linesize,
 425                               int mbs_per_slice, int16_t *blocks,
 426                               int blocks_per_mb, int plane_size_factor,
 427                               const int16_t *qmat)
 428 {
 429     int blocks_per_slice, saved_pos;
 430
 431     saved_pos = put_bits_count(pb);
 432     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 433
 434     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 435     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 436                ctx->scantable, qmat);
 437     flush_put_bits(pb);
 438
 439     return (put_bits_count(pb) - saved_pos) >> 3;
 440 }
 441
 442 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 443 {
 444     const int dbits = (abits == 8) ? 4 : 7;
 445     const int dsize = 1 << dbits - 1;
 446     int diff = cur - prev;
 447
 448     diff = av_mod_uintp2(diff, abits);
 449     if (diff >= (1 << abits) - dsize)
 450         diff -= 1 << abits;
 451     if (diff < -dsize || diff > dsize || !diff) {
 452         put_bits(pb, 1, 1);
 453         put_bits(pb, abits, diff);
 454     } else {
 455         put_bits(pb, 1, 0);
 456         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 457         put_bits(pb, 1, diff < 0);
 458     }
 459 }
 460
 461 static void put_alpha_run(PutBitContext *pb, int run)
 462 {
 463     if (run) {
 464         put_bits(pb, 1, 0);
 465         if (run < 0x10)
 466             put_bits(pb, 4, run);
 467         else
 468             put_bits(pb, 15, run);
 469     } else {
 470         put_bits(pb, 1, 1);
 471     }
 472 }
 473
 474 // todo alpha quantisation for high quants
 475 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 476                               int mbs_per_slice, uint16_t *blocks,
 477                               int quant)
 478 {
 479     const int abits = ctx->alpha_bits;
 480     const int mask  = (1 << abits) - 1;
 481     const int num_coeffs = mbs_per_slice * 256;
 482     int saved_pos = put_bits_count(pb);
 483     int prev = mask, cur;
 484     int idx = 0;
 485     int run = 0;
 486
 487     cur = blocks[idx++];
 488     put_alpha_diff(pb, cur, prev, abits);
 489     prev = cur;
 490     do {
 491         cur = blocks[idx++];
 492         if (cur != prev) {
 493             put_alpha_run (pb, run);
 494             put_alpha_diff(pb, cur, prev, abits);
 495             prev = cur;
 496             run  = 0;
 497         } else {
 498             run++;
 499         }
 500     } while (idx < num_coeffs);
 501     if (run)
 502         put_alpha_run(pb, run);
 503     flush_put_bits(pb);
 504     return (put_bits_count(pb) - saved_pos) >> 3;
 505 }
 506
 507 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 508                         PutBitContext *pb,
 509                         int sizes[4], int x, int y, int quant,
 510                         int mbs_per_slice)
 511 {
 512     ProresContext *ctx = avctx->priv_data;
 513     int i, xp, yp;
 514     int total_size = 0;
 515     const uint16_t *src;
 516     int slice_width_factor = av_log2(mbs_per_slice);
 517     int num_cblocks, pwidth, linesize, line_add;
 518     int plane_factor, is_chroma;
 519     uint16_t *qmat;
 520
 521     if (ctx->pictures_per_frame == 1)
 522         line_add = 0;
 523     else
 524         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 525
 526     if (ctx->force_quant) {
 527         qmat = ctx->quants[0];
 528     } else if (quant < MAX_STORED_Q) {
 529         qmat = ctx->quants[quant];
 530     } else {
 531         qmat = ctx->custom_q;
 532         for (i = 0; i < 64; i++)
 533             qmat[i] = ctx->quant_mat[i] * quant;
 534     }
 535
 536     for (i = 0; i < ctx->num_planes; i++) {
 537         is_chroma    = (i == 1 || i == 2);
 538         plane_factor = slice_width_factor + 2;
 539         if (is_chroma)
 540             plane_factor += ctx->chroma_factor - 3;
 541         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 542             xp          = x << 4;
 543             yp          = y << 4;
 544             num_cblocks = 4;
 545             pwidth      = avctx->width;
 546         } else {
 547             xp          = x << 3;
 548             yp          = y << 4;
 549             num_cblocks = 2;
 550             pwidth      = avctx->width >> 1;
 551         }
 552
 553         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 554         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 555                                 line_add * pic->linesize[i]) + xp;
 556
 557         if (i < 3) {
 558             get_slice_data(ctx, src, linesize, xp, yp,
 559                            pwidth, avctx->height / ctx->pictures_per_frame,
 560                            ctx->blocks[0], ctx->emu_buf,
 561                            mbs_per_slice, num_cblocks, is_chroma);
 562             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 563                                           mbs_per_slice, ctx->blocks[0],
 564                                           num_cblocks, plane_factor,
 565                                           qmat);
 566         } else {
 567             get_alpha_data(ctx, src, linesize, xp, yp,
 568                            pwidth, avctx->height / ctx->pictures_per_frame,
 569                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 570             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
 571                                           ctx->blocks[0], quant);
 572         }
 573         total_size += sizes[i];
 574         if (put_bits_left(pb) < 0) {
 575             av_log(avctx, AV_LOG_ERROR,
 576                    "Underestimated required buffer size.\n");
 577             return AVERROR_BUG;
 578         }
 579     }
 580     return total_size;
 581 }
 582
 583 static inline int estimate_vlc(unsigned codebook, int val)
 584 {
 585     unsigned int rice_order, exp_order, switch_bits, switch_val;
 586     int exponent;
 587
 588     /* number of prefix bits to switch between Rice and expGolomb */
 589     switch_bits = (codebook & 3) + 1;
 590     rice_order  =  codebook >> 5;       /* rice code order */
 591     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 592
 593     switch_val  = switch_bits << rice_order;
 594
 595     if (val >= switch_val) {
 596         val -= switch_val - (1 << exp_order);
 597         exponent = av_log2(val);
 598
 599         return exponent * 2 - exp_order + switch_bits + 1;
 600     } else {
 601         return (val >> rice_order) + rice_order + 1;
 602     }
 603 }
 604
 605 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 606                         int scale)
 607 {
 608     int i;
 609     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 610     int bits;
 611
 612     prev_dc  = (blocks[0] - 0x4000) / scale;
 613     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 614     sign     = 0;
 615     codebook = 3;
 616     blocks  += 64;
 617     *error  += FFABS(blocks[0] - 0x4000) % scale;
 618
 619     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 620         dc       = (blocks[0] - 0x4000) / scale;
 621         *error  += FFABS(blocks[0] - 0x4000) % scale;
 622         delta    = dc - prev_dc;
 623         new_sign = GET_SIGN(delta);
 624         delta    = (delta ^ sign) - sign;
 625         code     = MAKE_CODE(delta);
 626         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 627         codebook = (code + (code & 1)) >> 1;
 628         codebook = FFMIN(codebook, 3);
 629         sign     = new_sign;
 630         prev_dc  = dc;
 631     }
 632
 633     return bits;
 634 }
 635
 636 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 637                         int plane_size_factor,
 638                         const uint8_t *scan, const int16_t *qmat)
 639 {
 640     int idx, i;
 641     int run, level, run_cb, lev_cb;
 642     int max_coeffs, abs_level;
 643     int bits = 0;
 644
 645     max_coeffs = blocks_per_slice << 6;
 646     run_cb     = ff_prores_run_to_cb_index[4];
 647     lev_cb     = ff_prores_lev_to_cb_index[2];
 648     run        = 0;
 649
 650     for (i = 1; i < 64; i++) {
 651         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 652             level   = blocks[idx] / qmat[scan[i]];
 653             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 654             if (level) {
 655                 abs_level = FFABS(level);
 656                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 657                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 658                                      abs_level - 1) + 1;
 659
 660                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 661                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 662                 run    = 0;
 663             } else {
 664                 run++;
 665             }
 666         }
 667     }
 668
 669     return bits;
 670 }
 671
 672 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 673                                 const uint16_t *src, int linesize,
 674                                 int mbs_per_slice,
 675                                 int blocks_per_mb, int plane_size_factor,
 676                                 const int16_t *qmat, ProresThreadData *td)
 677 {
 678     int blocks_per_slice;
 679     int bits;
 680
 681     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 682
 683     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 684     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 685                          plane_size_factor, ctx->scantable, qmat);
 686
 687     return FFALIGN(bits, 8);
 688 }
 689
 690 static int est_alpha_diff(int cur, int prev, int abits)
 691 {
 692     const int dbits = (abits == 8) ? 4 : 7;
 693     const int dsize = 1 << dbits - 1;
 694     int diff = cur - prev;
 695
 696     diff = av_mod_uintp2(diff, abits);
 697     if (diff >= (1 << abits) - dsize)
 698         diff -= 1 << abits;
 699     if (diff < -dsize || diff > dsize || !diff)
 700         return abits + 1;
 701     else
 702         return dbits + 1;
 703 }
 704
 705 static int estimate_alpha_plane(ProresContext *ctx, int *error,
 706                                 const uint16_t *src, int linesize,
 707                                 int mbs_per_slice, int quant,
 708                                 int16_t *blocks)
 709 {
 710     const int abits = ctx->alpha_bits;
 711     const int mask  = (1 << abits) - 1;
 712     const int num_coeffs = mbs_per_slice * 256;
 713     int prev = mask, cur;
 714     int idx = 0;
 715     int run = 0;
 716     int bits;
 717
 718     *error = 0;
 719     cur = blocks[idx++];
 720     bits = est_alpha_diff(cur, prev, abits);
 721     prev = cur;
 722     do {
 723         cur = blocks[idx++];
 724         if (cur != prev) {
 725             if (!run)
 726                 bits++;
 727             else if (run < 0x10)
 728                 bits += 4;
 729             else
 730                 bits += 15;
 731             bits += est_alpha_diff(cur, prev, abits);
 732             prev = cur;
 733             run  = 0;
 734         } else {
 735             run++;
 736         }
 737     } while (idx < num_coeffs);
 738
 739     if (run) {
 740         if (run < 0x10)
 741             bits += 4;
 742         else
 743             bits += 15;
 744     }
 745
 746     return bits;
 747 }
 748
 749 static int find_slice_quant(AVCodecContext *avctx,
 750                             int trellis_node, int x, int y, int mbs_per_slice,
 751                             ProresThreadData *td)
 752 {
 753     ProresContext *ctx = avctx->priv_data;
 754     int i, q, pq, xp, yp;
 755     const uint16_t *src;
 756     int slice_width_factor = av_log2(mbs_per_slice);
 757     int num_cblocks[MAX_PLANES], pwidth;
 758     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 759     const int min_quant = ctx->profile_info->min_quant;
 760     const int max_quant = ctx->profile_info->max_quant;
 761     int error, bits, bits_limit;
 762     int mbs, prev, cur, new_score;
 763     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 764     int overquant;
 765     uint16_t *qmat;
 766     int linesize[4], line_add;
 767
 768     if (ctx->pictures_per_frame == 1)
 769         line_add = 0;
 770     else
 771         line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
 772     mbs = x + mbs_per_slice;
 773
 774     for (i = 0; i < ctx->num_planes; i++) {
 775         is_chroma[i]    = (i == 1 || i == 2);
 776         plane_factor[i] = slice_width_factor + 2;
 777         if (is_chroma[i])
 778             plane_factor[i] += ctx->chroma_factor - 3;
 779         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 780             xp             = x << 4;
 781             yp             = y << 4;
 782             num_cblocks[i] = 4;
 783             pwidth         = avctx->width;
 784         } else {
 785             xp             = x << 3;
 786             yp             = y << 4;
 787             num_cblocks[i] = 2;
 788             pwidth         = avctx->width >> 1;
 789         }
 790
 791         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
 792         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
 793                                  line_add * ctx->pic->linesize[i]) + xp;
 794
 795         if (i < 3) {
 796             get_slice_data(ctx, src, linesize[i], xp, yp,
 797                            pwidth, avctx->height / ctx->pictures_per_frame,
 798                            td->blocks[i], td->emu_buf,
 799                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 800         } else {
 801             get_alpha_data(ctx, src, linesize[i], xp, yp,
 802                            pwidth, avctx->height / ctx->pictures_per_frame,
 803                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 804         }
 805     }
 806
 807     for (q = min_quant; q < max_quant + 2; q++) {
 808         td->nodes[trellis_node + q].prev_node = -1;
 809         td->nodes[trellis_node + q].quant     = q;
 810     }
 811
 812     // todo: maybe perform coarser quantising to fit into frame size when needed
 813     for (q = min_quant; q <= max_quant; q++) {
 814         bits  = 0;
 815         error = 0;
 816         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 817             bits += estimate_slice_plane(ctx, &error, i,
 818                                          src, linesize[i],
 819                                          mbs_per_slice,
 820                                          num_cblocks[i], plane_factor[i],
 821                                          ctx->quants[q], td);
 822         }
 823         if (ctx->alpha_bits)
 824             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 825                                          mbs_per_slice, q, td->blocks[3]);
 826         if (bits > 65000 * 8)
 827             error = SCORE_LIMIT;
 828
 829         slice_bits[q]  = bits;
 830         slice_score[q] = error;
 831     }
 832     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 833         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 834         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 835         overquant = max_quant;
 836     } else {
 837         for (q = max_quant + 1; q < 128; q++) {
 838             bits  = 0;
 839             error = 0;
 840             if (q < MAX_STORED_Q) {
 841                 qmat = ctx->quants[q];
 842             } else {
 843                 qmat = td->custom_q;
 844                 for (i = 0; i < 64; i++)
 845                     qmat[i] = ctx->quant_mat[i] * q;
 846             }
 847             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 848                 bits += estimate_slice_plane(ctx, &error, i,
 849                                              src, linesize[i],
 850                                              mbs_per_slice,
 851                                              num_cblocks[i], plane_factor[i],
 852                                              qmat, td);
 853             }
 854             if (ctx->alpha_bits)
 855                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 856                                              mbs_per_slice, q, td->blocks[3]);
 857             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 858                 break;
 859         }
 860
 861         slice_bits[max_quant + 1]  = bits;
 862         slice_score[max_quant + 1] = error;
 863         overquant = q;
 864     }
 865     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 866
 867     bits_limit = mbs * ctx->bits_per_mb;
 868     for (pq = min_quant; pq < max_quant + 2; pq++) {
 869         prev = trellis_node - TRELLIS_WIDTH + pq;
 870
 871         for (q = min_quant; q < max_quant + 2; q++) {
 872             cur = trellis_node + q;
 873
 874             bits  = td->nodes[prev].bits + slice_bits[q];
 875             error = slice_score[q];
 876             if (bits > bits_limit)
 877                 error = SCORE_LIMIT;
 878
 879             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 880                 new_score = td->nodes[prev].score + error;
 881             else
 882                 new_score = SCORE_LIMIT;
 883             if (td->nodes[cur].prev_node == -1 ||
 884                 td->nodes[cur].score >= new_score) {
 885
 886                 td->nodes[cur].bits      = bits;
 887                 td->nodes[cur].score     = new_score;
 888                 td->nodes[cur].prev_node = prev;
 889             }
 890         }
 891     }
 892
 893     error = td->nodes[trellis_node + min_quant].score;
 894     pq    = trellis_node + min_quant;
 895     for (q = min_quant + 1; q < max_quant + 2; q++) {
 896         if (td->nodes[trellis_node + q].score <= error) {
 897             error = td->nodes[trellis_node + q].score;
 898             pq    = trellis_node + q;
 899         }
 900     }
 901
 902     return pq;
 903 }
 904
 905 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 906                              int jobnr, int threadnr)
 907 {
 908     ProresContext *ctx = avctx->priv_data;
 909     ProresThreadData *td = ctx->tdata + threadnr;
 910     int mbs_per_slice = ctx->mbs_per_slice;
 911     int x, y = jobnr, mb, q = 0;
 912
 913     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 914         while (ctx->mb_width - x < mbs_per_slice)
 915             mbs_per_slice >>= 1;
 916         q = find_slice_quant(avctx,
 917                              (mb + 1) * TRELLIS_WIDTH, x, y,
 918                              mbs_per_slice, td);
 919     }
 920
 921     for (x = ctx->slices_width - 1; x >= 0; x--) {
 922         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 923         q = td->nodes[q].prev_node;
 924     }
 925
 926     return 0;
 927 }
 928
 929 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 930                         const AVFrame *pic, int *got_packet)
 931 {
 932     ProresContext *ctx = avctx->priv_data;
 933     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 934     uint8_t *picture_size_pos;
 935     PutBitContext pb;
 936     int x, y, i, mb, q = 0;
 937     int sizes[4] = { 0 };
 938     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 939     int frame_size, picture_size, slice_size;
 940     int pkt_size, ret;
 941     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
 942     uint8_t frame_flags;
 943
 944     ctx->pic = pic;
 945     pkt_size = ctx->frame_size_upper_bound;
 946
 947     if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
 948         return ret;
 949
 950     orig_buf = pkt->data;
 951
 952     // frame atom
 953     orig_buf += 4;                              // frame size
 954     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 955     buf = orig_buf;
 956
 957     // frame header
 958     tmp = buf;
 959     buf += 2;                                   // frame header size will be stored here
 960     bytestream_put_be16  (&buf, 0);             // version 1
 961     bytestream_put_buffer(&buf, ctx->vendor, 4);
 962     bytestream_put_be16  (&buf, avctx->width);
 963     bytestream_put_be16  (&buf, avctx->height);
 964
 965     frame_flags = ctx->chroma_factor << 6;
 966     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 967         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 968     bytestream_put_byte  (&buf, frame_flags);
 969
 970     bytestream_put_byte  (&buf, 0);             // reserved
 971     bytestream_put_byte  (&buf, avctx->color_primaries);
 972     bytestream_put_byte  (&buf, avctx->color_trc);
 973     bytestream_put_byte  (&buf, avctx->colorspace);
 974     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
 975     bytestream_put_byte  (&buf, 0);             // reserved
 976     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 977         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 978         // luma quantisation matrix
 979         for (i = 0; i < 64; i++)
 980             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 981         // chroma quantisation matrix
 982         for (i = 0; i < 64; i++)
 983             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 984     } else {
 985         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 986     }
 987     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 988
 989     for (ctx->cur_picture_idx = 0;
 990          ctx->cur_picture_idx < ctx->pictures_per_frame;
 991          ctx->cur_picture_idx++) {
 992         // picture header
 993         picture_size_pos = buf + 1;
 994         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 995         buf += 4;                                   // picture data size will be stored here
 996         bytestream_put_be16  (&buf, ctx->slices_per_picture);
 997         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 998
 999         // seek table - will be filled during slice encoding
1000         slice_sizes = buf;
1001         buf += ctx->slices_per_picture * 2;
1002
1003         // slices
1004         if (!ctx->force_quant) {
1005             ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1006                                   ctx->mb_height);
1007             if (ret)
1008                 return ret;
1009         }
1010
1011         for (y = 0; y < ctx->mb_height; y++) {
1012             int mbs_per_slice = ctx->mbs_per_slice;
1013             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1014                 q = ctx->force_quant ? ctx->force_quant
1015                                      : ctx->slice_q[mb + y * ctx->slices_width];
1016
1017                 while (ctx->mb_width - x < mbs_per_slice)
1018                     mbs_per_slice >>= 1;
1019
1020                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1021                 slice_hdr = buf;
1022                 buf += slice_hdr_size - 1;
1023                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1024                     uint8_t *start = pkt->data;
1025                     // Recompute new size according to max_slice_size
1026                     // and deduce delta
1027                     int delta = 200 + (ctx->pictures_per_frame *
1028                                 ctx->slices_per_picture + 1) *
1029                                 max_slice_size - pkt_size;
1030
1031                     delta = FFMAX(delta, 2 * max_slice_size);
1032                     ctx->frame_size_upper_bound += delta;
1033
1034                     if (!ctx->warn) {
1035                         avpriv_request_sample(avctx,
1036                                               "Packet too small: is %i,"
1037                                               " needs %i (slice: %i). "
1038                                               "Correct allocation",
1039                                               pkt_size, delta, max_slice_size);
1040                         ctx->warn = 1;
1041                     }
1042
1043                     ret = av_grow_packet(pkt, delta);
1044                     if (ret < 0)
1045                         return ret;
1046
1047                     pkt_size += delta;
1048                     // restore pointers
1049                     orig_buf         = pkt->data + (orig_buf         - start);
1050                     buf              = pkt->data + (buf              - start);
1051                     picture_size_pos = pkt->data + (picture_size_pos - start);
1052                     slice_sizes      = pkt->data + (slice_sizes      - start);
1053                     slice_hdr        = pkt->data + (slice_hdr        - start);
1054                     tmp              = pkt->data + (tmp              - start);
1055                 }
1056                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1057                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1058                                    mbs_per_slice);
1059                 if (ret < 0)
1060                     return ret;
1061
1062                 bytestream_put_byte(&slice_hdr, q);
1063                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1064                 for (i = 0; i < ctx->num_planes - 1; i++) {
1065                     bytestream_put_be16(&slice_hdr, sizes[i]);
1066                     slice_size += sizes[i];
1067                 }
1068                 bytestream_put_be16(&slice_sizes, slice_size);
1069                 buf += slice_size - slice_hdr_size;
1070                 if (max_slice_size < slice_size)
1071                     max_slice_size = slice_size;
1072             }
1073         }
1074
1075         picture_size = buf - (picture_size_pos - 1);
1076         bytestream_put_be32(&picture_size_pos, picture_size);
1077     }
1078
1079     orig_buf -= 8;
1080     frame_size = buf - orig_buf;
1081     bytestream_put_be32(&orig_buf, frame_size);
1082
1083     pkt->size   = frame_size;
1084     pkt->flags |= AV_PKT_FLAG_KEY;
1085     *got_packet = 1;
1086
1087     return 0;
1088 }
1089
1090 static av_cold int encode_close(AVCodecContext *avctx)
1091 {
1092     ProresContext *ctx = avctx->priv_data;
1093     int i;
1094
1095     av_frame_free(&avctx->coded_frame);
1096
1097     if (ctx->tdata) {
1098         for (i = 0; i < avctx->thread_count; i++)
1099             av_freep(&ctx->tdata[i].nodes);
1100     }
1101     av_freep(&ctx->tdata);
1102     av_freep(&ctx->slice_q);
1103
1104     return 0;
1105 }
1106
1107 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1108                         int linesize, int16_t *block)
1109 {
1110     int x, y;
1111     const uint16_t *tsrc = src;
1112
1113     for (y = 0; y < 8; y++) {
1114         for (x = 0; x < 8; x++)
1115             block[y * 8 + x] = tsrc[x];
1116         tsrc += linesize >> 1;
1117     }
1118     fdsp->fdct(block);
1119 }
1120
1121 static av_cold int encode_init(AVCodecContext *avctx)
1122 {
1123     ProresContext *ctx = avctx->priv_data;
1124     int mps;
1125     int i, j;
1126     int min_quant, max_quant;
1127     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1128
1129     avctx->bits_per_raw_sample = 10;
1130     avctx->coded_frame = av_frame_alloc();
1131     if (!avctx->coded_frame)
1132         return AVERROR(ENOMEM);
1133     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1134     avctx->coded_frame->key_frame = 1;
1135
1136     ctx->fdct      = prores_fdct;
1137     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1138                                 : ff_prores_progressive_scan;
1139     ff_fdctdsp_init(&ctx->fdsp, avctx);
1140
1141     mps = ctx->mbs_per_slice;
1142     if (mps & (mps - 1)) {
1143         av_log(avctx, AV_LOG_ERROR,
1144                "there should be an integer power of two MBs per slice\n");
1145         return AVERROR(EINVAL);
1146     }
1147     if (ctx->profile == PRORES_PROFILE_AUTO) {
1148         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1149         ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1150                         !(desc->log2_chroma_w + desc->log2_chroma_h))
1151                      ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1152         av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1153                "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1154                ? "4:4:4:4 profile because of the used input colorspace"
1155                : "HQ profile to keep best quality");
1156     }
1157     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1158         if (ctx->profile != PRORES_PROFILE_4444) {
1159             // force alpha and warn
1160             av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1161                    "encode alpha. Override with -profile if needed.\n");
1162             ctx->alpha_bits = 0;
1163         }
1164         if (ctx->alpha_bits & 7) {
1165             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1166             return AVERROR(EINVAL);
1167         }
1168         avctx->bits_per_coded_sample = 32;
1169     } else {
1170         ctx->alpha_bits = 0;
1171     }
1172
1173     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1174                          ? CFACTOR_Y422
1175                          : CFACTOR_Y444;
1176     ctx->profile_info  = prores_profile_info + ctx->profile;
1177     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1178
1179     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1180
1181     if (interlaced)
1182         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1183     else
1184         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1185
1186     ctx->slices_width  = ctx->mb_width / mps;
1187     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1188     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1189     ctx->pictures_per_frame = 1 + interlaced;
1190
1191     if (ctx->quant_sel == -1)
1192         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1193     else
1194         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1195
1196     if (strlen(ctx->vendor) != 4) {
1197         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1198         return AVERROR_INVALIDDATA;
1199     }
1200
1201     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1202     if (!ctx->force_quant) {
1203         if (!ctx->bits_per_mb) {
1204             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1205                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1206                                            ctx->pictures_per_frame)
1207                     break;
1208             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1209         } else if (ctx->bits_per_mb < 128) {
1210             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1211             return AVERROR_INVALIDDATA;
1212         }
1213
1214         min_quant = ctx->profile_info->min_quant;
1215         max_quant = ctx->profile_info->max_quant;
1216         for (i = min_quant; i < MAX_STORED_Q; i++) {
1217             for (j = 0; j < 64; j++)
1218                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1219         }
1220
1221         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1222         if (!ctx->slice_q) {
1223             encode_close(avctx);
1224             return AVERROR(ENOMEM);
1225         }
1226
1227         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1228         if (!ctx->tdata) {
1229             encode_close(avctx);
1230             return AVERROR(ENOMEM);
1231         }
1232
1233         for (j = 0; j < avctx->thread_count; j++) {
1234             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1235                                             * TRELLIS_WIDTH
1236                                             * sizeof(*ctx->tdata->nodes));
1237             if (!ctx->tdata[j].nodes) {
1238                 encode_close(avctx);
1239                 return AVERROR(ENOMEM);
1240             }
1241             for (i = min_quant; i < max_quant + 2; i++) {
1242                 ctx->tdata[j].nodes[i].prev_node = -1;
1243                 ctx->tdata[j].nodes[i].bits      = 0;
1244                 ctx->tdata[j].nodes[i].score     = 0;
1245             }
1246         }
1247     } else {
1248         int ls = 0;
1249
1250         if (ctx->force_quant > 64) {
1251             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1252             return AVERROR_INVALIDDATA;
1253         }
1254
1255         for (j = 0; j < 64; j++) {
1256             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1257             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1258         }
1259
1260         ctx->bits_per_mb = ls * 8;
1261         if (ctx->chroma_factor == CFACTOR_Y444)
1262             ctx->bits_per_mb += ls * 4;
1263     }
1264
1265     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1266                                    ctx->slices_per_picture + 1) *
1267                                   (2 + 2 * ctx->num_planes +
1268                                    (mps * ctx->bits_per_mb) / 8)
1269                                   + 200;
1270
1271     if (ctx->alpha_bits) {
1272          // The alpha plane is run-coded and might exceed the bit budget.
1273          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1274                                          ctx->slices_per_picture + 1) *
1275          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1276          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1277     }
1278
1279     avctx->codec_tag   = ctx->profile_info->tag;
1280
1281     av_log(avctx, AV_LOG_DEBUG,
1282            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1283            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1284            interlaced ? "yes" : "no", ctx->bits_per_mb);
1285     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1286            ctx->frame_size_upper_bound);
1287
1288     return 0;
1289 }
1290
1291 #define OFFSET(x) offsetof(ProresContext, x)
1292 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1293
1294 static const AVOption options[] = {
1295     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1296         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1297     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1298         { .i64 = PRORES_PROFILE_AUTO },
1299         PRORES_PROFILE_AUTO, PRORES_PROFILE_4444, VE, "profile" },
1300     { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1301         0, 0, VE, "profile" },
1302     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1303         0, 0, VE, "profile" },
1304     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1305         0, 0, VE, "profile" },
1306     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1307         0, 0, VE, "profile" },
1308     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1309         0, 0, VE, "profile" },
1310     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1311         0, 0, VE, "profile" },
1312     { "vendor", "vendor ID", OFFSET(vendor),
1313         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1314     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1315         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1316     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1317         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1318     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1319         0, 0, VE, "quant_mat" },
1320     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1321         0, 0, VE, "quant_mat" },
1322     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1323         0, 0, VE, "quant_mat" },
1324     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1325         0, 0, VE, "quant_mat" },
1326     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1327         0, 0, VE, "quant_mat" },
1328     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1329         0, 0, VE, "quant_mat" },
1330     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1331         { .i64 = 16 }, 0, 16, VE },
1332     { NULL }
1333 };
1334
1335 static const AVClass proresenc_class = {
1336     .class_name = "ProRes encoder",
1337     .item_name  = av_default_item_name,
1338     .option     = options,
1339     .version    = LIBAVUTIL_VERSION_INT,
1340 };
1341
1342 AVCodec ff_prores_ks_encoder = {
1343     .name           = "prores_ks",
1344     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1345     .type           = AVMEDIA_TYPE_VIDEO,
1346     .id             = AV_CODEC_ID_PRORES,
1347     .priv_data_size = sizeof(ProresContext),
1348     .init           = encode_init,
1349     .close          = encode_close,
1350     .encode2        = encode_frame,
1351     .capabilities   = CODEC_CAP_SLICE_THREADS,
1352     .pix_fmts       = (const enum AVPixelFormat[]) {
1353                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1354                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1355                       },
1356     .priv_class     = &proresenc_class,
1357 };