git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This encoder appears to be based on Anatoliy Wassermans considering
   7  * similarities in the bugs.
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/opt.h"
  27 #include "libavutil/pixdesc.h"
  28 #include "avcodec.h"
  29 #include "fdctdsp.h"
  30 #include "put_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "proresdata.h"
  34
  35 #define CFACTOR_Y422 2
  36 #define CFACTOR_Y444 3
  37
  38 #define MAX_MBS_PER_SLICE 8
  39
  40 #define MAX_PLANES 4
  41
  42 enum {
  43     PRORES_PROFILE_AUTO  = -1,
  44     PRORES_PROFILE_PROXY = 0,
  45     PRORES_PROFILE_LT,
  46     PRORES_PROFILE_STANDARD,
  47     PRORES_PROFILE_HQ,
  48     PRORES_PROFILE_4444,
  49     PRORES_PROFILE_4444XQ,
  50 };
  51
  52 enum {
  53     QUANT_MAT_PROXY = 0,
  54     QUANT_MAT_LT,
  55     QUANT_MAT_STANDARD,
  56     QUANT_MAT_HQ,
  57     QUANT_MAT_DEFAULT,
  58 };
  59
  60 static const uint8_t prores_quant_matrices[][64] = {
  61     { // proxy
  62          4,  7,  9, 11, 13, 14, 15, 63,
  63          7,  7, 11, 12, 14, 15, 63, 63,
  64          9, 11, 13, 14, 15, 63, 63, 63,
  65         11, 11, 13, 14, 63, 63, 63, 63,
  66         11, 13, 14, 63, 63, 63, 63, 63,
  67         13, 14, 63, 63, 63, 63, 63, 63,
  68         13, 63, 63, 63, 63, 63, 63, 63,
  69         63, 63, 63, 63, 63, 63, 63, 63,
  70     },
  71     { // LT
  72          4,  5,  6,  7,  9, 11, 13, 15,
  73          5,  5,  7,  8, 11, 13, 15, 17,
  74          6,  7,  9, 11, 13, 15, 15, 17,
  75          7,  7,  9, 11, 13, 15, 17, 19,
  76          7,  9, 11, 13, 14, 16, 19, 23,
  77          9, 11, 13, 14, 16, 19, 23, 29,
  78          9, 11, 13, 15, 17, 21, 28, 35,
  79         11, 13, 16, 17, 21, 28, 35, 41,
  80     },
  81     { // standard
  82          4,  4,  5,  5,  6,  7,  7,  9,
  83          4,  4,  5,  6,  7,  7,  9,  9,
  84          5,  5,  6,  7,  7,  9,  9, 10,
  85          5,  5,  6,  7,  7,  9,  9, 10,
  86          5,  6,  7,  7,  8,  9, 10, 12,
  87          6,  7,  7,  8,  9, 10, 12, 15,
  88          6,  7,  7,  9, 10, 11, 14, 17,
  89          7,  7,  9, 10, 11, 14, 17, 21,
  90     },
  91     { // high quality
  92          4,  4,  4,  4,  4,  4,  4,  4,
  93          4,  4,  4,  4,  4,  4,  4,  4,
  94          4,  4,  4,  4,  4,  4,  4,  4,
  95          4,  4,  4,  4,  4,  4,  4,  5,
  96          4,  4,  4,  4,  4,  4,  5,  5,
  97          4,  4,  4,  4,  4,  5,  5,  6,
  98          4,  4,  4,  4,  5,  5,  6,  7,
  99          4,  4,  4,  4,  5,  6,  7,  7,
 100     },
 101     { // codec default
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105          4,  4,  4,  4,  4,  4,  4,  4,
 106          4,  4,  4,  4,  4,  4,  4,  4,
 107          4,  4,  4,  4,  4,  4,  4,  4,
 108          4,  4,  4,  4,  4,  4,  4,  4,
 109          4,  4,  4,  4,  4,  4,  4,  4,
 110     },
 111 };
 112
 113 #define NUM_MB_LIMITS 4
 114 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 115     1620, // up to 720x576
 116     2700, // up to 960x720
 117     6075, // up to 1440x1080
 118     9216, // up to 2048x1152
 119 };
 120
 121 static const struct prores_profile {
 122     const char *full_name;
 123     uint32_t    tag;
 124     int         min_quant;
 125     int         max_quant;
 126     int         br_tab[NUM_MB_LIMITS];
 127     int         quant;
 128 } prores_profile_info[6] = {
 129     {
 130         .full_name = "proxy",
 131         .tag       = MKTAG('a', 'p', 'c', 'o'),
 132         .min_quant = 4,
 133         .max_quant = 8,
 134         .br_tab    = { 300, 242, 220, 194 },
 135         .quant     = QUANT_MAT_PROXY,
 136     },
 137     {
 138         .full_name = "LT",
 139         .tag       = MKTAG('a', 'p', 'c', 's'),
 140         .min_quant = 1,
 141         .max_quant = 9,
 142         .br_tab    = { 720, 560, 490, 440 },
 143         .quant     = QUANT_MAT_LT,
 144     },
 145     {
 146         .full_name = "standard",
 147         .tag       = MKTAG('a', 'p', 'c', 'n'),
 148         .min_quant = 1,
 149         .max_quant = 6,
 150         .br_tab    = { 1050, 808, 710, 632 },
 151         .quant     = QUANT_MAT_STANDARD,
 152     },
 153     {
 154         .full_name = "high quality",
 155         .tag       = MKTAG('a', 'p', 'c', 'h'),
 156         .min_quant = 1,
 157         .max_quant = 6,
 158         .br_tab    = { 1566, 1216, 1070, 950 },
 159         .quant     = QUANT_MAT_HQ,
 160     },
 161     {
 162         .full_name = "4444",
 163         .tag       = MKTAG('a', 'p', '4', 'h'),
 164         .min_quant = 1,
 165         .max_quant = 6,
 166         .br_tab    = { 2350, 1828, 1600, 1425 },
 167         .quant     = QUANT_MAT_HQ,
 168     },
 169     {
 170         .full_name = "4444XQ",
 171         .tag       = MKTAG('a', 'p', '4', 'x'),
 172         .min_quant = 1,
 173         .max_quant = 6,
 174         .br_tab    = { 3525, 2742, 2400, 2137 },
 175         .quant     = QUANT_MAT_HQ,
 176     }
 177 };
 178
 179 #define TRELLIS_WIDTH 16
 180 #define SCORE_LIMIT   INT_MAX / 2
 181
 182 struct TrellisNode {
 183     int prev_node;
 184     int quant;
 185     int bits;
 186     int score;
 187 };
 188
 189 #define MAX_STORED_Q 16
 190
 191 typedef struct ProresThreadData {
 192     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 193     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 194     int16_t custom_q[64];
 195     struct TrellisNode *nodes;
 196 } ProresThreadData;
 197
 198 typedef struct ProresContext {
 199     AVClass *class;
 200     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 201     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 202     int16_t quants[MAX_STORED_Q][64];
 203     int16_t custom_q[64];
 204     const uint8_t *quant_mat;
 205     const uint8_t *scantable;
 206
 207     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 208                  ptrdiff_t linesize, int16_t *block);
 209     FDCTDSPContext fdsp;
 210
 211     const AVFrame *pic;
 212     int mb_width, mb_height;
 213     int mbs_per_slice;
 214     int num_chroma_blocks, chroma_factor;
 215     int slices_width;
 216     int slices_per_picture;
 217     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 218     int cur_picture_idx;
 219     int num_planes;
 220     int bits_per_mb;
 221     int force_quant;
 222     int alpha_bits;
 223     int warn;
 224
 225     char *vendor;
 226     int quant_sel;
 227
 228     int frame_size_upper_bound;
 229
 230     int profile;
 231     const struct prores_profile *profile_info;
 232
 233     int *slice_q;
 234
 235     ProresThreadData *tdata;
 236 } ProresContext;
 237
 238 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 239                            ptrdiff_t linesize, int x, int y, int w, int h,
 240                            int16_t *blocks, uint16_t *emu_buf,
 241                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 242 {
 243     const uint16_t *esrc;
 244     const int mb_width = 4 * blocks_per_mb;
 245     ptrdiff_t elinesize;
 246     int i, j, k;
 247
 248     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 249         if (x >= w) {
 250             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 251                               * sizeof(*blocks));
 252             return;
 253         }
 254         if (x + mb_width <= w && y + 16 <= h) {
 255             esrc      = src;
 256             elinesize = linesize;
 257         } else {
 258             int bw, bh, pix;
 259
 260             esrc      = emu_buf;
 261             elinesize = 16 * sizeof(*emu_buf);
 262
 263             bw = FFMIN(w - x, mb_width);
 264             bh = FFMIN(h - y, 16);
 265
 266             for (j = 0; j < bh; j++) {
 267                 memcpy(emu_buf + j * 16,
 268                        (const uint8_t*)src + j * linesize,
 269                        bw * sizeof(*src));
 270                 pix = emu_buf[j * 16 + bw - 1];
 271                 for (k = bw; k < mb_width; k++)
 272                     emu_buf[j * 16 + k] = pix;
 273             }
 274             for (; j < 16; j++)
 275                 memcpy(emu_buf + j * 16,
 276                        emu_buf + (bh - 1) * 16,
 277                        mb_width * sizeof(*emu_buf));
 278         }
 279         if (!is_chroma) {
 280             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 281             blocks += 64;
 282             if (blocks_per_mb > 2) {
 283                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 284                 blocks += 64;
 285             }
 286             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 287             blocks += 64;
 288             if (blocks_per_mb > 2) {
 289                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 290                 blocks += 64;
 291             }
 292         } else {
 293             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 294             blocks += 64;
 295             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 296             blocks += 64;
 297             if (blocks_per_mb > 2) {
 298                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 299                 blocks += 64;
 300                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 301                 blocks += 64;
 302             }
 303         }
 304
 305         x += mb_width;
 306     }
 307 }
 308
 309 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 310                            ptrdiff_t linesize, int x, int y, int w, int h,
 311                            int16_t *blocks, int mbs_per_slice, int abits)
 312 {
 313     const int slice_width = 16 * mbs_per_slice;
 314     int i, j, copy_w, copy_h;
 315
 316     copy_w = FFMIN(w - x, slice_width);
 317     copy_h = FFMIN(h - y, 16);
 318     for (i = 0; i < copy_h; i++) {
 319         memcpy(blocks, src, copy_w * sizeof(*src));
 320         if (abits == 8)
 321             for (j = 0; j < copy_w; j++)
 322                 blocks[j] >>= 2;
 323         else
 324             for (j = 0; j < copy_w; j++)
 325                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 326         for (j = copy_w; j < slice_width; j++)
 327             blocks[j] = blocks[copy_w - 1];
 328         blocks += slice_width;
 329         src    += linesize >> 1;
 330     }
 331     for (; i < 16; i++) {
 332         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 333         blocks += slice_width;
 334     }
 335 }
 336
 337 /**
 338  * Write an unsigned rice/exp golomb codeword.
 339  */
 340 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 341 {
 342     unsigned int rice_order, exp_order, switch_bits, switch_val;
 343     int exponent;
 344
 345     /* number of prefix bits to switch between Rice and expGolomb */
 346     switch_bits = (codebook & 3) + 1;
 347     rice_order  =  codebook >> 5;       /* rice code order */
 348     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 349
 350     switch_val  = switch_bits << rice_order;
 351
 352     if (val >= switch_val) {
 353         val -= switch_val - (1 << exp_order);
 354         exponent = av_log2(val);
 355
 356         put_bits(pb, exponent - exp_order + switch_bits, 0);
 357         put_bits(pb, exponent + 1, val);
 358     } else {
 359         exponent = val >> rice_order;
 360
 361         if (exponent)
 362             put_bits(pb, exponent, 0);
 363         put_bits(pb, 1, 1);
 364         if (rice_order)
 365             put_sbits(pb, rice_order, val);
 366     }
 367 }
 368
 369 #define GET_SIGN(x)  ((x) >> 31)
 370 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
 371
 372 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 373                        int blocks_per_slice, int scale)
 374 {
 375     int i;
 376     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 377
 378     prev_dc = (blocks[0] - 0x4000) / scale;
 379     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 380     sign     = 0;
 381     codebook = 3;
 382     blocks  += 64;
 383
 384     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 385         dc       = (blocks[0] - 0x4000) / scale;
 386         delta    = dc - prev_dc;
 387         new_sign = GET_SIGN(delta);
 388         delta    = (delta ^ sign) - sign;
 389         code     = MAKE_CODE(delta);
 390         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 391         codebook = (code + (code & 1)) >> 1;
 392         codebook = FFMIN(codebook, 3);
 393         sign     = new_sign;
 394         prev_dc  = dc;
 395     }
 396 }
 397
 398 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 399                        int blocks_per_slice,
 400                        int plane_size_factor,
 401                        const uint8_t *scan, const int16_t *qmat)
 402 {
 403     int idx, i;
 404     int run, level, run_cb, lev_cb;
 405     int max_coeffs, abs_level;
 406
 407     max_coeffs = blocks_per_slice << 6;
 408     run_cb     = ff_prores_run_to_cb_index[4];
 409     lev_cb     = ff_prores_lev_to_cb_index[2];
 410     run        = 0;
 411
 412     for (i = 1; i < 64; i++) {
 413         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 414             level = blocks[idx] / qmat[scan[i]];
 415             if (level) {
 416                 abs_level = FFABS(level);
 417                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 418                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 419                                     abs_level - 1);
 420                 put_sbits(pb, 1, GET_SIGN(level));
 421
 422                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 423                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 424                 run    = 0;
 425             } else {
 426                 run++;
 427             }
 428         }
 429     }
 430 }
 431
 432 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 433                               const uint16_t *src, ptrdiff_t linesize,
 434                               int mbs_per_slice, int16_t *blocks,
 435                               int blocks_per_mb, int plane_size_factor,
 436                               const int16_t *qmat)
 437 {
 438     int blocks_per_slice, saved_pos;
 439
 440     saved_pos = put_bits_count(pb);
 441     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 442
 443     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 444     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 445                ctx->scantable, qmat);
 446     flush_put_bits(pb);
 447
 448     return (put_bits_count(pb) - saved_pos) >> 3;
 449 }
 450
 451 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 452 {
 453     const int dbits = (abits == 8) ? 4 : 7;
 454     const int dsize = 1 << dbits - 1;
 455     int diff = cur - prev;
 456
 457     diff = av_mod_uintp2(diff, abits);
 458     if (diff >= (1 << abits) - dsize)
 459         diff -= 1 << abits;
 460     if (diff < -dsize || diff > dsize || !diff) {
 461         put_bits(pb, 1, 1);
 462         put_bits(pb, abits, diff);
 463     } else {
 464         put_bits(pb, 1, 0);
 465         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 466         put_bits(pb, 1, diff < 0);
 467     }
 468 }
 469
 470 static void put_alpha_run(PutBitContext *pb, int run)
 471 {
 472     if (run) {
 473         put_bits(pb, 1, 0);
 474         if (run < 0x10)
 475             put_bits(pb, 4, run);
 476         else
 477             put_bits(pb, 15, run);
 478     } else {
 479         put_bits(pb, 1, 1);
 480     }
 481 }
 482
 483 // todo alpha quantisation for high quants
 484 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 485                               int mbs_per_slice, uint16_t *blocks,
 486                               int quant)
 487 {
 488     const int abits = ctx->alpha_bits;
 489     const int mask  = (1 << abits) - 1;
 490     const int num_coeffs = mbs_per_slice * 256;
 491     int saved_pos = put_bits_count(pb);
 492     int prev = mask, cur;
 493     int idx = 0;
 494     int run = 0;
 495
 496     cur = blocks[idx++];
 497     put_alpha_diff(pb, cur, prev, abits);
 498     prev = cur;
 499     do {
 500         cur = blocks[idx++];
 501         if (cur != prev) {
 502             put_alpha_run (pb, run);
 503             put_alpha_diff(pb, cur, prev, abits);
 504             prev = cur;
 505             run  = 0;
 506         } else {
 507             run++;
 508         }
 509     } while (idx < num_coeffs);
 510     if (run)
 511         put_alpha_run(pb, run);
 512     flush_put_bits(pb);
 513     return (put_bits_count(pb) - saved_pos) >> 3;
 514 }
 515
 516 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 517                         PutBitContext *pb,
 518                         int sizes[4], int x, int y, int quant,
 519                         int mbs_per_slice)
 520 {
 521     ProresContext *ctx = avctx->priv_data;
 522     int i, xp, yp;
 523     int total_size = 0;
 524     const uint16_t *src;
 525     int slice_width_factor = av_log2(mbs_per_slice);
 526     int num_cblocks, pwidth, line_add;
 527     ptrdiff_t linesize;
 528     int plane_factor, is_chroma;
 529     uint16_t *qmat;
 530
 531     if (ctx->pictures_per_frame == 1)
 532         line_add = 0;
 533     else
 534         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 535
 536     if (ctx->force_quant) {
 537         qmat = ctx->quants[0];
 538     } else if (quant < MAX_STORED_Q) {
 539         qmat = ctx->quants[quant];
 540     } else {
 541         qmat = ctx->custom_q;
 542         for (i = 0; i < 64; i++)
 543             qmat[i] = ctx->quant_mat[i] * quant;
 544     }
 545
 546     for (i = 0; i < ctx->num_planes; i++) {
 547         is_chroma    = (i == 1 || i == 2);
 548         plane_factor = slice_width_factor + 2;
 549         if (is_chroma)
 550             plane_factor += ctx->chroma_factor - 3;
 551         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 552             xp          = x << 4;
 553             yp          = y << 4;
 554             num_cblocks = 4;
 555             pwidth      = avctx->width;
 556         } else {
 557             xp          = x << 3;
 558             yp          = y << 4;
 559             num_cblocks = 2;
 560             pwidth      = avctx->width >> 1;
 561         }
 562
 563         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 564         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 565                                 line_add * pic->linesize[i]) + xp;
 566
 567         if (i < 3) {
 568             get_slice_data(ctx, src, linesize, xp, yp,
 569                            pwidth, avctx->height / ctx->pictures_per_frame,
 570                            ctx->blocks[0], ctx->emu_buf,
 571                            mbs_per_slice, num_cblocks, is_chroma);
 572             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 573                                           mbs_per_slice, ctx->blocks[0],
 574                                           num_cblocks, plane_factor,
 575                                           qmat);
 576         } else {
 577             get_alpha_data(ctx, src, linesize, xp, yp,
 578                            pwidth, avctx->height / ctx->pictures_per_frame,
 579                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 580             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
 581                                           ctx->blocks[0], quant);
 582         }
 583         total_size += sizes[i];
 584         if (put_bits_left(pb) < 0) {
 585             av_log(avctx, AV_LOG_ERROR,
 586                    "Underestimated required buffer size.\n");
 587             return AVERROR_BUG;
 588         }
 589     }
 590     return total_size;
 591 }
 592
 593 static inline int estimate_vlc(unsigned codebook, int val)
 594 {
 595     unsigned int rice_order, exp_order, switch_bits, switch_val;
 596     int exponent;
 597
 598     /* number of prefix bits to switch between Rice and expGolomb */
 599     switch_bits = (codebook & 3) + 1;
 600     rice_order  =  codebook >> 5;       /* rice code order */
 601     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 602
 603     switch_val  = switch_bits << rice_order;
 604
 605     if (val >= switch_val) {
 606         val -= switch_val - (1 << exp_order);
 607         exponent = av_log2(val);
 608
 609         return exponent * 2 - exp_order + switch_bits + 1;
 610     } else {
 611         return (val >> rice_order) + rice_order + 1;
 612     }
 613 }
 614
 615 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 616                         int scale)
 617 {
 618     int i;
 619     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 620     int bits;
 621
 622     prev_dc  = (blocks[0] - 0x4000) / scale;
 623     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 624     sign     = 0;
 625     codebook = 3;
 626     blocks  += 64;
 627     *error  += FFABS(blocks[0] - 0x4000) % scale;
 628
 629     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 630         dc       = (blocks[0] - 0x4000) / scale;
 631         *error  += FFABS(blocks[0] - 0x4000) % scale;
 632         delta    = dc - prev_dc;
 633         new_sign = GET_SIGN(delta);
 634         delta    = (delta ^ sign) - sign;
 635         code     = MAKE_CODE(delta);
 636         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 637         codebook = (code + (code & 1)) >> 1;
 638         codebook = FFMIN(codebook, 3);
 639         sign     = new_sign;
 640         prev_dc  = dc;
 641     }
 642
 643     return bits;
 644 }
 645
 646 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 647                         int plane_size_factor,
 648                         const uint8_t *scan, const int16_t *qmat)
 649 {
 650     int idx, i;
 651     int run, level, run_cb, lev_cb;
 652     int max_coeffs, abs_level;
 653     int bits = 0;
 654
 655     max_coeffs = blocks_per_slice << 6;
 656     run_cb     = ff_prores_run_to_cb_index[4];
 657     lev_cb     = ff_prores_lev_to_cb_index[2];
 658     run        = 0;
 659
 660     for (i = 1; i < 64; i++) {
 661         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 662             level   = blocks[idx] / qmat[scan[i]];
 663             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 664             if (level) {
 665                 abs_level = FFABS(level);
 666                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 667                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 668                                      abs_level - 1) + 1;
 669
 670                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 671                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 672                 run    = 0;
 673             } else {
 674                 run++;
 675             }
 676         }
 677     }
 678
 679     return bits;
 680 }
 681
 682 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 683                                 const uint16_t *src, ptrdiff_t linesize,
 684                                 int mbs_per_slice,
 685                                 int blocks_per_mb, int plane_size_factor,
 686                                 const int16_t *qmat, ProresThreadData *td)
 687 {
 688     int blocks_per_slice;
 689     int bits;
 690
 691     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 692
 693     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 694     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 695                          plane_size_factor, ctx->scantable, qmat);
 696
 697     return FFALIGN(bits, 8);
 698 }
 699
 700 static int est_alpha_diff(int cur, int prev, int abits)
 701 {
 702     const int dbits = (abits == 8) ? 4 : 7;
 703     const int dsize = 1 << dbits - 1;
 704     int diff = cur - prev;
 705
 706     diff = av_mod_uintp2(diff, abits);
 707     if (diff >= (1 << abits) - dsize)
 708         diff -= 1 << abits;
 709     if (diff < -dsize || diff > dsize || !diff)
 710         return abits + 1;
 711     else
 712         return dbits + 1;
 713 }
 714
 715 static int estimate_alpha_plane(ProresContext *ctx,
 716                                 const uint16_t *src, ptrdiff_t linesize,
 717                                 int mbs_per_slice, int16_t *blocks)
 718 {
 719     const int abits = ctx->alpha_bits;
 720     const int mask  = (1 << abits) - 1;
 721     const int num_coeffs = mbs_per_slice * 256;
 722     int prev = mask, cur;
 723     int idx = 0;
 724     int run = 0;
 725     int bits;
 726
 727     cur = blocks[idx++];
 728     bits = est_alpha_diff(cur, prev, abits);
 729     prev = cur;
 730     do {
 731         cur = blocks[idx++];
 732         if (cur != prev) {
 733             if (!run)
 734                 bits++;
 735             else if (run < 0x10)
 736                 bits += 4;
 737             else
 738                 bits += 15;
 739             bits += est_alpha_diff(cur, prev, abits);
 740             prev = cur;
 741             run  = 0;
 742         } else {
 743             run++;
 744         }
 745     } while (idx < num_coeffs);
 746
 747     if (run) {
 748         if (run < 0x10)
 749             bits += 4;
 750         else
 751             bits += 15;
 752     }
 753
 754     return bits;
 755 }
 756
 757 static int find_slice_quant(AVCodecContext *avctx,
 758                             int trellis_node, int x, int y, int mbs_per_slice,
 759                             ProresThreadData *td)
 760 {
 761     ProresContext *ctx = avctx->priv_data;
 762     int i, q, pq, xp, yp;
 763     const uint16_t *src;
 764     int slice_width_factor = av_log2(mbs_per_slice);
 765     int num_cblocks[MAX_PLANES], pwidth;
 766     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 767     const int min_quant = ctx->profile_info->min_quant;
 768     const int max_quant = ctx->profile_info->max_quant;
 769     int error, bits, bits_limit;
 770     int mbs, prev, cur, new_score;
 771     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 772     int overquant;
 773     uint16_t *qmat;
 774     int linesize[4], line_add;
 775     int alpha_bits = 0;
 776
 777     if (ctx->pictures_per_frame == 1)
 778         line_add = 0;
 779     else
 780         line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
 781     mbs = x + mbs_per_slice;
 782
 783     for (i = 0; i < ctx->num_planes; i++) {
 784         is_chroma[i]    = (i == 1 || i == 2);
 785         plane_factor[i] = slice_width_factor + 2;
 786         if (is_chroma[i])
 787             plane_factor[i] += ctx->chroma_factor - 3;
 788         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 789             xp             = x << 4;
 790             yp             = y << 4;
 791             num_cblocks[i] = 4;
 792             pwidth         = avctx->width;
 793         } else {
 794             xp             = x << 3;
 795             yp             = y << 4;
 796             num_cblocks[i] = 2;
 797             pwidth         = avctx->width >> 1;
 798         }
 799
 800         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
 801         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
 802                                  line_add * ctx->pic->linesize[i]) + xp;
 803
 804         if (i < 3) {
 805             get_slice_data(ctx, src, linesize[i], xp, yp,
 806                            pwidth, avctx->height / ctx->pictures_per_frame,
 807                            td->blocks[i], td->emu_buf,
 808                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 809         } else {
 810             get_alpha_data(ctx, src, linesize[i], xp, yp,
 811                            pwidth, avctx->height / ctx->pictures_per_frame,
 812                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 813         }
 814     }
 815
 816     for (q = min_quant; q < max_quant + 2; q++) {
 817         td->nodes[trellis_node + q].prev_node = -1;
 818         td->nodes[trellis_node + q].quant     = q;
 819     }
 820
 821     if (ctx->alpha_bits)
 822         alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
 823                                           mbs_per_slice, td->blocks[3]);
 824     // todo: maybe perform coarser quantising to fit into frame size when needed
 825     for (q = min_quant; q <= max_quant; q++) {
 826         bits  = alpha_bits;
 827         error = 0;
 828         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 829             bits += estimate_slice_plane(ctx, &error, i,
 830                                          src, linesize[i],
 831                                          mbs_per_slice,
 832                                          num_cblocks[i], plane_factor[i],
 833                                          ctx->quants[q], td);
 834         }
 835         if (bits > 65000 * 8)
 836             error = SCORE_LIMIT;
 837
 838         slice_bits[q]  = bits;
 839         slice_score[q] = error;
 840     }
 841     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 842         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 843         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 844         overquant = max_quant;
 845     } else {
 846         for (q = max_quant + 1; q < 128; q++) {
 847             bits  = alpha_bits;
 848             error = 0;
 849             if (q < MAX_STORED_Q) {
 850                 qmat = ctx->quants[q];
 851             } else {
 852                 qmat = td->custom_q;
 853                 for (i = 0; i < 64; i++)
 854                     qmat[i] = ctx->quant_mat[i] * q;
 855             }
 856             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 857                 bits += estimate_slice_plane(ctx, &error, i,
 858                                              src, linesize[i],
 859                                              mbs_per_slice,
 860                                              num_cblocks[i], plane_factor[i],
 861                                              qmat, td);
 862             }
 863             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 864                 break;
 865         }
 866
 867         slice_bits[max_quant + 1]  = bits;
 868         slice_score[max_quant + 1] = error;
 869         overquant = q;
 870     }
 871     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 872
 873     bits_limit = mbs * ctx->bits_per_mb;
 874     for (pq = min_quant; pq < max_quant + 2; pq++) {
 875         prev = trellis_node - TRELLIS_WIDTH + pq;
 876
 877         for (q = min_quant; q < max_quant + 2; q++) {
 878             cur = trellis_node + q;
 879
 880             bits  = td->nodes[prev].bits + slice_bits[q];
 881             error = slice_score[q];
 882             if (bits > bits_limit)
 883                 error = SCORE_LIMIT;
 884
 885             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 886                 new_score = td->nodes[prev].score + error;
 887             else
 888                 new_score = SCORE_LIMIT;
 889             if (td->nodes[cur].prev_node == -1 ||
 890                 td->nodes[cur].score >= new_score) {
 891
 892                 td->nodes[cur].bits      = bits;
 893                 td->nodes[cur].score     = new_score;
 894                 td->nodes[cur].prev_node = prev;
 895             }
 896         }
 897     }
 898
 899     error = td->nodes[trellis_node + min_quant].score;
 900     pq    = trellis_node + min_quant;
 901     for (q = min_quant + 1; q < max_quant + 2; q++) {
 902         if (td->nodes[trellis_node + q].score <= error) {
 903             error = td->nodes[trellis_node + q].score;
 904             pq    = trellis_node + q;
 905         }
 906     }
 907
 908     return pq;
 909 }
 910
 911 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 912                              int jobnr, int threadnr)
 913 {
 914     ProresContext *ctx = avctx->priv_data;
 915     ProresThreadData *td = ctx->tdata + threadnr;
 916     int mbs_per_slice = ctx->mbs_per_slice;
 917     int x, y = jobnr, mb, q = 0;
 918
 919     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 920         while (ctx->mb_width - x < mbs_per_slice)
 921             mbs_per_slice >>= 1;
 922         q = find_slice_quant(avctx,
 923                              (mb + 1) * TRELLIS_WIDTH, x, y,
 924                              mbs_per_slice, td);
 925     }
 926
 927     for (x = ctx->slices_width - 1; x >= 0; x--) {
 928         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 929         q = td->nodes[q].prev_node;
 930     }
 931
 932     return 0;
 933 }
 934
 935 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 936                         const AVFrame *pic, int *got_packet)
 937 {
 938     ProresContext *ctx = avctx->priv_data;
 939     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 940     uint8_t *picture_size_pos;
 941     PutBitContext pb;
 942     int x, y, i, mb, q = 0;
 943     int sizes[4] = { 0 };
 944     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 945     int frame_size, picture_size, slice_size;
 946     int pkt_size, ret;
 947     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
 948     uint8_t frame_flags;
 949
 950     ctx->pic = pic;
 951     pkt_size = ctx->frame_size_upper_bound;
 952
 953     if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
 954         return ret;
 955
 956     orig_buf = pkt->data;
 957
 958     // frame atom
 959     orig_buf += 4;                              // frame size
 960     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 961     buf = orig_buf;
 962
 963     // frame header
 964     tmp = buf;
 965     buf += 2;                                   // frame header size will be stored here
 966     bytestream_put_be16  (&buf, 0);             // version 1
 967     bytestream_put_buffer(&buf, ctx->vendor, 4);
 968     bytestream_put_be16  (&buf, avctx->width);
 969     bytestream_put_be16  (&buf, avctx->height);
 970
 971     frame_flags = ctx->chroma_factor << 6;
 972     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
 973         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 974     bytestream_put_byte  (&buf, frame_flags);
 975
 976     bytestream_put_byte  (&buf, 0);             // reserved
 977     bytestream_put_byte  (&buf, pic->color_primaries);
 978     bytestream_put_byte  (&buf, pic->color_trc);
 979     bytestream_put_byte  (&buf, pic->colorspace);
 980     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
 981     bytestream_put_byte  (&buf, 0);             // reserved
 982     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 983         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 984         // luma quantisation matrix
 985         for (i = 0; i < 64; i++)
 986             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 987         // chroma quantisation matrix
 988         for (i = 0; i < 64; i++)
 989             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 990     } else {
 991         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 992     }
 993     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 994
 995     for (ctx->cur_picture_idx = 0;
 996          ctx->cur_picture_idx < ctx->pictures_per_frame;
 997          ctx->cur_picture_idx++) {
 998         // picture header
 999         picture_size_pos = buf + 1;
1000         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1001         buf += 4;                                   // picture data size will be stored here
1002         bytestream_put_be16  (&buf, ctx->slices_per_picture);
1003         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1004
1005         // seek table - will be filled during slice encoding
1006         slice_sizes = buf;
1007         buf += ctx->slices_per_picture * 2;
1008
1009         // slices
1010         if (!ctx->force_quant) {
1011             ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1012                                   ctx->mb_height);
1013             if (ret)
1014                 return ret;
1015         }
1016
1017         for (y = 0; y < ctx->mb_height; y++) {
1018             int mbs_per_slice = ctx->mbs_per_slice;
1019             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1020                 q = ctx->force_quant ? ctx->force_quant
1021                                      : ctx->slice_q[mb + y * ctx->slices_width];
1022
1023                 while (ctx->mb_width - x < mbs_per_slice)
1024                     mbs_per_slice >>= 1;
1025
1026                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1027                 slice_hdr = buf;
1028                 buf += slice_hdr_size - 1;
1029                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1030                     uint8_t *start = pkt->data;
1031                     // Recompute new size according to max_slice_size
1032                     // and deduce delta
1033                     int delta = 200 + (ctx->pictures_per_frame *
1034                                 ctx->slices_per_picture + 1) *
1035                                 max_slice_size - pkt_size;
1036
1037                     delta = FFMAX(delta, 2 * max_slice_size);
1038                     ctx->frame_size_upper_bound += delta;
1039
1040                     if (!ctx->warn) {
1041                         avpriv_request_sample(avctx,
1042                                               "Packet too small: is %i,"
1043                                               " needs %i (slice: %i). "
1044                                               "Correct allocation",
1045                                               pkt_size, delta, max_slice_size);
1046                         ctx->warn = 1;
1047                     }
1048
1049                     ret = av_grow_packet(pkt, delta);
1050                     if (ret < 0)
1051                         return ret;
1052
1053                     pkt_size += delta;
1054                     // restore pointers
1055                     orig_buf         = pkt->data + (orig_buf         - start);
1056                     buf              = pkt->data + (buf              - start);
1057                     picture_size_pos = pkt->data + (picture_size_pos - start);
1058                     slice_sizes      = pkt->data + (slice_sizes      - start);
1059                     slice_hdr        = pkt->data + (slice_hdr        - start);
1060                     tmp              = pkt->data + (tmp              - start);
1061                 }
1062                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1063                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1064                                    mbs_per_slice);
1065                 if (ret < 0)
1066                     return ret;
1067
1068                 bytestream_put_byte(&slice_hdr, q);
1069                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1070                 for (i = 0; i < ctx->num_planes - 1; i++) {
1071                     bytestream_put_be16(&slice_hdr, sizes[i]);
1072                     slice_size += sizes[i];
1073                 }
1074                 bytestream_put_be16(&slice_sizes, slice_size);
1075                 buf += slice_size - slice_hdr_size;
1076                 if (max_slice_size < slice_size)
1077                     max_slice_size = slice_size;
1078             }
1079         }
1080
1081         picture_size = buf - (picture_size_pos - 1);
1082         bytestream_put_be32(&picture_size_pos, picture_size);
1083     }
1084
1085     orig_buf -= 8;
1086     frame_size = buf - orig_buf;
1087     bytestream_put_be32(&orig_buf, frame_size);
1088
1089     pkt->size   = frame_size;
1090     pkt->flags |= AV_PKT_FLAG_KEY;
1091     *got_packet = 1;
1092
1093     return 0;
1094 }
1095
1096 static av_cold int encode_close(AVCodecContext *avctx)
1097 {
1098     ProresContext *ctx = avctx->priv_data;
1099     int i;
1100
1101     if (ctx->tdata) {
1102         for (i = 0; i < avctx->thread_count; i++)
1103             av_freep(&ctx->tdata[i].nodes);
1104     }
1105     av_freep(&ctx->tdata);
1106     av_freep(&ctx->slice_q);
1107
1108     return 0;
1109 }
1110
1111 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1112                         ptrdiff_t linesize, int16_t *block)
1113 {
1114     int x, y;
1115     const uint16_t *tsrc = src;
1116
1117     for (y = 0; y < 8; y++) {
1118         for (x = 0; x < 8; x++)
1119             block[y * 8 + x] = tsrc[x];
1120         tsrc += linesize >> 1;
1121     }
1122     fdsp->fdct(block);
1123 }
1124
1125 static av_cold int encode_init(AVCodecContext *avctx)
1126 {
1127     ProresContext *ctx = avctx->priv_data;
1128     int mps;
1129     int i, j;
1130     int min_quant, max_quant;
1131     int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1132
1133     avctx->bits_per_raw_sample = 10;
1134 #if FF_API_CODED_FRAME
1135 FF_DISABLE_DEPRECATION_WARNINGS
1136     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1137     avctx->coded_frame->key_frame = 1;
1138 FF_ENABLE_DEPRECATION_WARNINGS
1139 #endif
1140
1141     ctx->fdct      = prores_fdct;
1142     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1143                                 : ff_prores_progressive_scan;
1144     ff_fdctdsp_init(&ctx->fdsp, avctx);
1145
1146     mps = ctx->mbs_per_slice;
1147     if (mps & (mps - 1)) {
1148         av_log(avctx, AV_LOG_ERROR,
1149                "there should be an integer power of two MBs per slice\n");
1150         return AVERROR(EINVAL);
1151     }
1152     if (ctx->profile == PRORES_PROFILE_AUTO) {
1153         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1154         ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1155                         !(desc->log2_chroma_w + desc->log2_chroma_h))
1156                      ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1157         av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1158                "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1159                ? "4:4:4:4 profile because of the used input colorspace"
1160                : "HQ profile to keep best quality");
1161     }
1162     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1163         if (ctx->profile != PRORES_PROFILE_4444 &&
1164             ctx->profile != PRORES_PROFILE_4444XQ) {
1165             // force alpha and warn
1166             av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1167                    "encode alpha. Override with -profile if needed.\n");
1168             ctx->alpha_bits = 0;
1169         }
1170         if (ctx->alpha_bits & 7) {
1171             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1172             return AVERROR(EINVAL);
1173         }
1174         avctx->bits_per_coded_sample = 32;
1175     } else {
1176         ctx->alpha_bits = 0;
1177     }
1178
1179     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1180                          ? CFACTOR_Y422
1181                          : CFACTOR_Y444;
1182     ctx->profile_info  = prores_profile_info + ctx->profile;
1183     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1184
1185     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1186
1187     if (interlaced)
1188         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1189     else
1190         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1191
1192     ctx->slices_width  = ctx->mb_width / mps;
1193     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1194     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1195     ctx->pictures_per_frame = 1 + interlaced;
1196
1197     if (ctx->quant_sel == -1)
1198         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1199     else
1200         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1201
1202     if (strlen(ctx->vendor) != 4) {
1203         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1204         return AVERROR_INVALIDDATA;
1205     }
1206
1207     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1208     if (!ctx->force_quant) {
1209         if (!ctx->bits_per_mb) {
1210             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1211                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1212                                            ctx->pictures_per_frame)
1213                     break;
1214             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1215             if (ctx->alpha_bits)
1216                 ctx->bits_per_mb *= 20;
1217         } else if (ctx->bits_per_mb < 128) {
1218             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1219             return AVERROR_INVALIDDATA;
1220         }
1221
1222         min_quant = ctx->profile_info->min_quant;
1223         max_quant = ctx->profile_info->max_quant;
1224         for (i = min_quant; i < MAX_STORED_Q; i++) {
1225             for (j = 0; j < 64; j++)
1226                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1227         }
1228
1229         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1230         if (!ctx->slice_q) {
1231             encode_close(avctx);
1232             return AVERROR(ENOMEM);
1233         }
1234
1235         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1236         if (!ctx->tdata) {
1237             encode_close(avctx);
1238             return AVERROR(ENOMEM);
1239         }
1240
1241         for (j = 0; j < avctx->thread_count; j++) {
1242             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1243                                             * TRELLIS_WIDTH
1244                                             * sizeof(*ctx->tdata->nodes));
1245             if (!ctx->tdata[j].nodes) {
1246                 encode_close(avctx);
1247                 return AVERROR(ENOMEM);
1248             }
1249             for (i = min_quant; i < max_quant + 2; i++) {
1250                 ctx->tdata[j].nodes[i].prev_node = -1;
1251                 ctx->tdata[j].nodes[i].bits      = 0;
1252                 ctx->tdata[j].nodes[i].score     = 0;
1253             }
1254         }
1255     } else {
1256         int ls = 0;
1257
1258         if (ctx->force_quant > 64) {
1259             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1260             return AVERROR_INVALIDDATA;
1261         }
1262
1263         for (j = 0; j < 64; j++) {
1264             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1265             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1266         }
1267
1268         ctx->bits_per_mb = ls * 8;
1269         if (ctx->chroma_factor == CFACTOR_Y444)
1270             ctx->bits_per_mb += ls * 4;
1271     }
1272
1273     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1274                                    ctx->slices_per_picture + 1) *
1275                                   (2 + 2 * ctx->num_planes +
1276                                    (mps * ctx->bits_per_mb) / 8)
1277                                   + 200;
1278
1279     if (ctx->alpha_bits) {
1280          // The alpha plane is run-coded and might exceed the bit budget.
1281          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1282                                          ctx->slices_per_picture + 1) *
1283          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1284          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1285     }
1286
1287     avctx->codec_tag   = ctx->profile_info->tag;
1288
1289     av_log(avctx, AV_LOG_DEBUG,
1290            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1291            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1292            interlaced ? "yes" : "no", ctx->bits_per_mb);
1293     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1294            ctx->frame_size_upper_bound);
1295
1296     return 0;
1297 }
1298
1299 #define OFFSET(x) offsetof(ProresContext, x)
1300 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1301
1302 static const AVOption options[] = {
1303     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1304         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1305     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1306         { .i64 = PRORES_PROFILE_AUTO },
1307         PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1308     { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1309         0, 0, VE, "profile" },
1310     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1311         0, 0, VE, "profile" },
1312     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1313         0, 0, VE, "profile" },
1314     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1315         0, 0, VE, "profile" },
1316     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1317         0, 0, VE, "profile" },
1318     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1319         0, 0, VE, "profile" },
1320     { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1321         0, 0, VE, "profile" },
1322     { "vendor", "vendor ID", OFFSET(vendor),
1323         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1324     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1325         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1326     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1327         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1328     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1329         0, 0, VE, "quant_mat" },
1330     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1331         0, 0, VE, "quant_mat" },
1332     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1333         0, 0, VE, "quant_mat" },
1334     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1335         0, 0, VE, "quant_mat" },
1336     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1337         0, 0, VE, "quant_mat" },
1338     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1339         0, 0, VE, "quant_mat" },
1340     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1341         { .i64 = 16 }, 0, 16, VE },
1342     { NULL }
1343 };
1344
1345 static const AVClass proresenc_class = {
1346     .class_name = "ProRes encoder",
1347     .item_name  = av_default_item_name,
1348     .option     = options,
1349     .version    = LIBAVUTIL_VERSION_INT,
1350 };
1351
1352 AVCodec ff_prores_ks_encoder = {
1353     .name           = "prores_ks",
1354     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1355     .type           = AVMEDIA_TYPE_VIDEO,
1356     .id             = AV_CODEC_ID_PRORES,
1357     .priv_data_size = sizeof(ProresContext),
1358     .init           = encode_init,
1359     .close          = encode_close,
1360     .encode2        = encode_frame,
1361     .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
1362     .pix_fmts       = (const enum AVPixelFormat[]) {
1363                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1364                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1365                       },
1366     .priv_class     = &proresenc_class,
1367 };