git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "libavutil/pixdesc.h"
  25 #include "avcodec.h"
  26 #include "fdctdsp.h"
  27 #include "put_bits.h"
  28 #include "bytestream.h"
  29 #include "internal.h"
  30 #include "proresdata.h"
  31
  32 #define CFACTOR_Y422 2
  33 #define CFACTOR_Y444 3
  34
  35 #define MAX_MBS_PER_SLICE 8
  36
  37 #define MAX_PLANES 4
  38
  39 enum {
  40     PRORES_PROFILE_PROXY = 0,
  41     PRORES_PROFILE_LT,
  42     PRORES_PROFILE_STANDARD,
  43     PRORES_PROFILE_HQ,
  44     PRORES_PROFILE_4444,
  45 };
  46
  47 enum {
  48     QUANT_MAT_PROXY = 0,
  49     QUANT_MAT_LT,
  50     QUANT_MAT_STANDARD,
  51     QUANT_MAT_HQ,
  52     QUANT_MAT_DEFAULT,
  53 };
  54
  55 static const uint8_t prores_quant_matrices[][64] = {
  56     { // proxy
  57          4,  7,  9, 11, 13, 14, 15, 63,
  58          7,  7, 11, 12, 14, 15, 63, 63,
  59          9, 11, 13, 14, 15, 63, 63, 63,
  60         11, 11, 13, 14, 63, 63, 63, 63,
  61         11, 13, 14, 63, 63, 63, 63, 63,
  62         13, 14, 63, 63, 63, 63, 63, 63,
  63         13, 63, 63, 63, 63, 63, 63, 63,
  64         63, 63, 63, 63, 63, 63, 63, 63,
  65     },
  66     { // LT
  67          4,  5,  6,  7,  9, 11, 13, 15,
  68          5,  5,  7,  8, 11, 13, 15, 17,
  69          6,  7,  9, 11, 13, 15, 15, 17,
  70          7,  7,  9, 11, 13, 15, 17, 19,
  71          7,  9, 11, 13, 14, 16, 19, 23,
  72          9, 11, 13, 14, 16, 19, 23, 29,
  73          9, 11, 13, 15, 17, 21, 28, 35,
  74         11, 13, 16, 17, 21, 28, 35, 41,
  75     },
  76     { // standard
  77          4,  4,  5,  5,  6,  7,  7,  9,
  78          4,  4,  5,  6,  7,  7,  9,  9,
  79          5,  5,  6,  7,  7,  9,  9, 10,
  80          5,  5,  6,  7,  7,  9,  9, 10,
  81          5,  6,  7,  7,  8,  9, 10, 12,
  82          6,  7,  7,  8,  9, 10, 12, 15,
  83          6,  7,  7,  9, 10, 11, 14, 17,
  84          7,  7,  9, 10, 11, 14, 17, 21,
  85     },
  86     { // high quality
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  4,
  89          4,  4,  4,  4,  4,  4,  4,  4,
  90          4,  4,  4,  4,  4,  4,  4,  5,
  91          4,  4,  4,  4,  4,  4,  5,  5,
  92          4,  4,  4,  4,  4,  5,  5,  6,
  93          4,  4,  4,  4,  5,  5,  6,  7,
  94          4,  4,  4,  4,  5,  6,  7,  7,
  95     },
  96     { // codec default
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105     },
 106 };
 107
 108 #define NUM_MB_LIMITS 4
 109 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 110     1620, // up to 720x576
 111     2700, // up to 960x720
 112     6075, // up to 1440x1080
 113     9216, // up to 2048x1152
 114 };
 115
 116 static const struct prores_profile {
 117     const char *full_name;
 118     uint32_t    tag;
 119     int         min_quant;
 120     int         max_quant;
 121     int         br_tab[NUM_MB_LIMITS];
 122     int         quant;
 123 } prores_profile_info[5] = {
 124     {
 125         .full_name = "proxy",
 126         .tag       = MKTAG('a', 'p', 'c', 'o'),
 127         .min_quant = 4,
 128         .max_quant = 8,
 129         .br_tab    = { 300, 242, 220, 194 },
 130         .quant     = QUANT_MAT_PROXY,
 131     },
 132     {
 133         .full_name = "LT",
 134         .tag       = MKTAG('a', 'p', 'c', 's'),
 135         .min_quant = 1,
 136         .max_quant = 9,
 137         .br_tab    = { 720, 560, 490, 440 },
 138         .quant     = QUANT_MAT_LT,
 139     },
 140     {
 141         .full_name = "standard",
 142         .tag       = MKTAG('a', 'p', 'c', 'n'),
 143         .min_quant = 1,
 144         .max_quant = 6,
 145         .br_tab    = { 1050, 808, 710, 632 },
 146         .quant     = QUANT_MAT_STANDARD,
 147     },
 148     {
 149         .full_name = "high quality",
 150         .tag       = MKTAG('a', 'p', 'c', 'h'),
 151         .min_quant = 1,
 152         .max_quant = 6,
 153         .br_tab    = { 1566, 1216, 1070, 950 },
 154         .quant     = QUANT_MAT_HQ,
 155     },
 156     {
 157         .full_name = "4444",
 158         .tag       = MKTAG('a', 'p', '4', 'h'),
 159         .min_quant = 1,
 160         .max_quant = 6,
 161         .br_tab    = { 2350, 1828, 1600, 1425 },
 162         .quant     = QUANT_MAT_HQ,
 163     }
 164 };
 165
 166 #define TRELLIS_WIDTH 16
 167 #define SCORE_LIMIT   INT_MAX / 2
 168
 169 struct TrellisNode {
 170     int prev_node;
 171     int quant;
 172     int bits;
 173     int score;
 174 };
 175
 176 #define MAX_STORED_Q 16
 177
 178 typedef struct ProresThreadData {
 179     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 180     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 181     int16_t custom_q[64];
 182     struct TrellisNode *nodes;
 183 } ProresThreadData;
 184
 185 typedef struct ProresContext {
 186     AVClass *class;
 187     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 188     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 189     int16_t quants[MAX_STORED_Q][64];
 190     int16_t custom_q[64];
 191     const uint8_t *quant_mat;
 192     const uint8_t *scantable;
 193
 194     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 195                  int linesize, int16_t *block);
 196     FDCTDSPContext fdsp;
 197
 198     const AVFrame *pic;
 199     int mb_width, mb_height;
 200     int mbs_per_slice;
 201     int num_chroma_blocks, chroma_factor;
 202     int slices_width;
 203     int slices_per_picture;
 204     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 205     int cur_picture_idx;
 206     int num_planes;
 207     int bits_per_mb;
 208     int force_quant;
 209     int alpha_bits;
 210     int warn;
 211
 212     char *vendor;
 213     int quant_sel;
 214
 215     int frame_size_upper_bound;
 216
 217     int profile;
 218     const struct prores_profile *profile_info;
 219
 220     int *slice_q;
 221
 222     ProresThreadData *tdata;
 223 } ProresContext;
 224
 225 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 226                            int linesize, int x, int y, int w, int h,
 227                            int16_t *blocks, uint16_t *emu_buf,
 228                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 229 {
 230     const uint16_t *esrc;
 231     const int mb_width = 4 * blocks_per_mb;
 232     int elinesize;
 233     int i, j, k;
 234
 235     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 236         if (x >= w) {
 237             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 238                               * sizeof(*blocks));
 239             return;
 240         }
 241         if (x + mb_width <= w && y + 16 <= h) {
 242             esrc      = src;
 243             elinesize = linesize;
 244         } else {
 245             int bw, bh, pix;
 246
 247             esrc      = emu_buf;
 248             elinesize = 16 * sizeof(*emu_buf);
 249
 250             bw = FFMIN(w - x, mb_width);
 251             bh = FFMIN(h - y, 16);
 252
 253             for (j = 0; j < bh; j++) {
 254                 memcpy(emu_buf + j * 16,
 255                        (const uint8_t*)src + j * linesize,
 256                        bw * sizeof(*src));
 257                 pix = emu_buf[j * 16 + bw - 1];
 258                 for (k = bw; k < mb_width; k++)
 259                     emu_buf[j * 16 + k] = pix;
 260             }
 261             for (; j < 16; j++)
 262                 memcpy(emu_buf + j * 16,
 263                        emu_buf + (bh - 1) * 16,
 264                        mb_width * sizeof(*emu_buf));
 265         }
 266         if (!is_chroma) {
 267             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 268             blocks += 64;
 269             if (blocks_per_mb > 2) {
 270                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 271                 blocks += 64;
 272             }
 273             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 274             blocks += 64;
 275             if (blocks_per_mb > 2) {
 276                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 277                 blocks += 64;
 278             }
 279         } else {
 280             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 281             blocks += 64;
 282             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 283             blocks += 64;
 284             if (blocks_per_mb > 2) {
 285                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 286                 blocks += 64;
 287                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 288                 blocks += 64;
 289             }
 290         }
 291
 292         x += mb_width;
 293     }
 294 }
 295
 296 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 297                            int linesize, int x, int y, int w, int h,
 298                            int16_t *blocks, int mbs_per_slice, int abits)
 299 {
 300     const int slice_width = 16 * mbs_per_slice;
 301     int i, j, copy_w, copy_h;
 302
 303     copy_w = FFMIN(w - x, slice_width);
 304     copy_h = FFMIN(h - y, 16);
 305     for (i = 0; i < copy_h; i++) {
 306         memcpy(blocks, src, copy_w * sizeof(*src));
 307         if (abits == 8)
 308             for (j = 0; j < copy_w; j++)
 309                 blocks[j] >>= 2;
 310         else
 311             for (j = 0; j < copy_w; j++)
 312                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 313         for (j = copy_w; j < slice_width; j++)
 314             blocks[j] = blocks[copy_w - 1];
 315         blocks += slice_width;
 316         src    += linesize >> 1;
 317     }
 318     for (; i < 16; i++) {
 319         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 320         blocks += slice_width;
 321     }
 322 }
 323
 324 /**
 325  * Write an unsigned rice/exp golomb codeword.
 326  */
 327 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 328 {
 329     unsigned int rice_order, exp_order, switch_bits, switch_val;
 330     int exponent;
 331
 332     /* number of prefix bits to switch between Rice and expGolomb */
 333     switch_bits = (codebook & 3) + 1;
 334     rice_order  =  codebook >> 5;       /* rice code order */
 335     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 336
 337     switch_val  = switch_bits << rice_order;
 338
 339     if (val >= switch_val) {
 340         val -= switch_val - (1 << exp_order);
 341         exponent = av_log2(val);
 342
 343         put_bits(pb, exponent - exp_order + switch_bits, 0);
 344         put_bits(pb, exponent + 1, val);
 345     } else {
 346         exponent = val >> rice_order;
 347
 348         if (exponent)
 349             put_bits(pb, exponent, 0);
 350         put_bits(pb, 1, 1);
 351         if (rice_order)
 352             put_sbits(pb, rice_order, val);
 353     }
 354 }
 355
 356 #define GET_SIGN(x)  ((x) >> 31)
 357 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 358
 359 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 360                        int blocks_per_slice, int scale)
 361 {
 362     int i;
 363     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 364
 365     prev_dc = (blocks[0] - 0x4000) / scale;
 366     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 367     sign     = 0;
 368     codebook = 3;
 369     blocks  += 64;
 370
 371     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 372         dc       = (blocks[0] - 0x4000) / scale;
 373         delta    = dc - prev_dc;
 374         new_sign = GET_SIGN(delta);
 375         delta    = (delta ^ sign) - sign;
 376         code     = MAKE_CODE(delta);
 377         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 378         codebook = (code + (code & 1)) >> 1;
 379         codebook = FFMIN(codebook, 3);
 380         sign     = new_sign;
 381         prev_dc  = dc;
 382     }
 383 }
 384
 385 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 386                        int blocks_per_slice,
 387                        int plane_size_factor,
 388                        const uint8_t *scan, const int16_t *qmat)
 389 {
 390     int idx, i;
 391     int run, level, run_cb, lev_cb;
 392     int max_coeffs, abs_level;
 393
 394     max_coeffs = blocks_per_slice << 6;
 395     run_cb     = ff_prores_run_to_cb_index[4];
 396     lev_cb     = ff_prores_lev_to_cb_index[2];
 397     run        = 0;
 398
 399     for (i = 1; i < 64; i++) {
 400         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 401             level = blocks[idx] / qmat[scan[i]];
 402             if (level) {
 403                 abs_level = FFABS(level);
 404                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 405                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 406                                     abs_level - 1);
 407                 put_sbits(pb, 1, GET_SIGN(level));
 408
 409                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 410                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 411                 run    = 0;
 412             } else {
 413                 run++;
 414             }
 415         }
 416     }
 417 }
 418
 419 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 420                               const uint16_t *src, int linesize,
 421                               int mbs_per_slice, int16_t *blocks,
 422                               int blocks_per_mb, int plane_size_factor,
 423                               const int16_t *qmat)
 424 {
 425     int blocks_per_slice, saved_pos;
 426
 427     saved_pos = put_bits_count(pb);
 428     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 429
 430     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 431     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 432                ctx->scantable, qmat);
 433     flush_put_bits(pb);
 434
 435     return (put_bits_count(pb) - saved_pos) >> 3;
 436 }
 437
 438 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 439 {
 440     const int mask  = (1 << abits) - 1;
 441     const int dbits = (abits == 8) ? 4 : 7;
 442     const int dsize = 1 << dbits - 1;
 443     int diff = cur - prev;
 444
 445     diff &= mask;
 446     if (diff >= (1 << abits) - dsize)
 447         diff -= 1 << abits;
 448     if (diff < -dsize || diff > dsize || !diff) {
 449         put_bits(pb, 1, 1);
 450         put_bits(pb, abits, diff);
 451     } else {
 452         put_bits(pb, 1, 0);
 453         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 454         put_bits(pb, 1, diff < 0);
 455     }
 456 }
 457
 458 static void put_alpha_run(PutBitContext *pb, int run)
 459 {
 460     if (run) {
 461         put_bits(pb, 1, 0);
 462         if (run < 0x10)
 463             put_bits(pb, 4, run);
 464         else
 465             put_bits(pb, 15, run);
 466     } else {
 467         put_bits(pb, 1, 1);
 468     }
 469 }
 470
 471 // todo alpha quantisation for high quants
 472 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 473                               int mbs_per_slice, uint16_t *blocks,
 474                               int quant)
 475 {
 476     const int abits = ctx->alpha_bits;
 477     const int mask  = (1 << abits) - 1;
 478     const int num_coeffs = mbs_per_slice * 256;
 479     int saved_pos = put_bits_count(pb);
 480     int prev = mask, cur;
 481     int idx = 0;
 482     int run = 0;
 483
 484     cur = blocks[idx++];
 485     put_alpha_diff(pb, cur, prev, abits);
 486     prev = cur;
 487     do {
 488         cur = blocks[idx++];
 489         if (cur != prev) {
 490             put_alpha_run (pb, run);
 491             put_alpha_diff(pb, cur, prev, abits);
 492             prev = cur;
 493             run  = 0;
 494         } else {
 495             run++;
 496         }
 497     } while (idx < num_coeffs);
 498     if (run)
 499         put_alpha_run(pb, run);
 500     flush_put_bits(pb);
 501     return (put_bits_count(pb) - saved_pos) >> 3;
 502 }
 503
 504 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 505                         PutBitContext *pb,
 506                         int sizes[4], int x, int y, int quant,
 507                         int mbs_per_slice)
 508 {
 509     ProresContext *ctx = avctx->priv_data;
 510     int i, xp, yp;
 511     int total_size = 0;
 512     const uint16_t *src;
 513     int slice_width_factor = av_log2(mbs_per_slice);
 514     int num_cblocks, pwidth, linesize, line_add;
 515     int plane_factor, is_chroma;
 516     uint16_t *qmat;
 517
 518     if (ctx->pictures_per_frame == 1)
 519         line_add = 0;
 520     else
 521         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 522
 523     if (ctx->force_quant) {
 524         qmat = ctx->quants[0];
 525     } else if (quant < MAX_STORED_Q) {
 526         qmat = ctx->quants[quant];
 527     } else {
 528         qmat = ctx->custom_q;
 529         for (i = 0; i < 64; i++)
 530             qmat[i] = ctx->quant_mat[i] * quant;
 531     }
 532
 533     for (i = 0; i < ctx->num_planes; i++) {
 534         is_chroma    = (i == 1 || i == 2);
 535         plane_factor = slice_width_factor + 2;
 536         if (is_chroma)
 537             plane_factor += ctx->chroma_factor - 3;
 538         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 539             xp          = x << 4;
 540             yp          = y << 4;
 541             num_cblocks = 4;
 542             pwidth      = avctx->width;
 543         } else {
 544             xp          = x << 3;
 545             yp          = y << 4;
 546             num_cblocks = 2;
 547             pwidth      = avctx->width >> 1;
 548         }
 549
 550         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 551         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 552                                 line_add * pic->linesize[i]) + xp;
 553
 554         if (i < 3) {
 555             get_slice_data(ctx, src, linesize, xp, yp,
 556                            pwidth, avctx->height / ctx->pictures_per_frame,
 557                            ctx->blocks[0], ctx->emu_buf,
 558                            mbs_per_slice, num_cblocks, is_chroma);
 559             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 560                                           mbs_per_slice, ctx->blocks[0],
 561                                           num_cblocks, plane_factor,
 562                                           qmat);
 563         } else {
 564             get_alpha_data(ctx, src, linesize, xp, yp,
 565                            pwidth, avctx->height / ctx->pictures_per_frame,
 566                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 567             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
 568                                           ctx->blocks[0], quant);
 569         }
 570         total_size += sizes[i];
 571         if (put_bits_left(pb) < 0) {
 572             av_log(avctx, AV_LOG_ERROR,
 573                    "Underestimated required buffer size.\n");
 574             return AVERROR_BUG;
 575         }
 576     }
 577     return total_size;
 578 }
 579
 580 static inline int estimate_vlc(unsigned codebook, int val)
 581 {
 582     unsigned int rice_order, exp_order, switch_bits, switch_val;
 583     int exponent;
 584
 585     /* number of prefix bits to switch between Rice and expGolomb */
 586     switch_bits = (codebook & 3) + 1;
 587     rice_order  =  codebook >> 5;       /* rice code order */
 588     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 589
 590     switch_val  = switch_bits << rice_order;
 591
 592     if (val >= switch_val) {
 593         val -= switch_val - (1 << exp_order);
 594         exponent = av_log2(val);
 595
 596         return exponent * 2 - exp_order + switch_bits + 1;
 597     } else {
 598         return (val >> rice_order) + rice_order + 1;
 599     }
 600 }
 601
 602 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 603                         int scale)
 604 {
 605     int i;
 606     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 607     int bits;
 608
 609     prev_dc  = (blocks[0] - 0x4000) / scale;
 610     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 611     sign     = 0;
 612     codebook = 3;
 613     blocks  += 64;
 614     *error  += FFABS(blocks[0] - 0x4000) % scale;
 615
 616     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 617         dc       = (blocks[0] - 0x4000) / scale;
 618         *error  += FFABS(blocks[0] - 0x4000) % scale;
 619         delta    = dc - prev_dc;
 620         new_sign = GET_SIGN(delta);
 621         delta    = (delta ^ sign) - sign;
 622         code     = MAKE_CODE(delta);
 623         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 624         codebook = (code + (code & 1)) >> 1;
 625         codebook = FFMIN(codebook, 3);
 626         sign     = new_sign;
 627         prev_dc  = dc;
 628     }
 629
 630     return bits;
 631 }
 632
 633 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 634                         int plane_size_factor,
 635                         const uint8_t *scan, const int16_t *qmat)
 636 {
 637     int idx, i;
 638     int run, level, run_cb, lev_cb;
 639     int max_coeffs, abs_level;
 640     int bits = 0;
 641
 642     max_coeffs = blocks_per_slice << 6;
 643     run_cb     = ff_prores_run_to_cb_index[4];
 644     lev_cb     = ff_prores_lev_to_cb_index[2];
 645     run        = 0;
 646
 647     for (i = 1; i < 64; i++) {
 648         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 649             level   = blocks[idx] / qmat[scan[i]];
 650             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 651             if (level) {
 652                 abs_level = FFABS(level);
 653                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 654                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 655                                      abs_level - 1) + 1;
 656
 657                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 658                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 659                 run    = 0;
 660             } else {
 661                 run++;
 662             }
 663         }
 664     }
 665
 666     return bits;
 667 }
 668
 669 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 670                                 const uint16_t *src, int linesize,
 671                                 int mbs_per_slice,
 672                                 int blocks_per_mb, int plane_size_factor,
 673                                 const int16_t *qmat, ProresThreadData *td)
 674 {
 675     int blocks_per_slice;
 676     int bits;
 677
 678     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 679
 680     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 681     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 682                          plane_size_factor, ctx->scantable, qmat);
 683
 684     return FFALIGN(bits, 8);
 685 }
 686
 687 static int est_alpha_diff(int cur, int prev, int abits)
 688 {
 689     const int mask  = (1 << abits) - 1;
 690     const int dbits = (abits == 8) ? 4 : 7;
 691     const int dsize = 1 << dbits - 1;
 692     int diff = cur - prev;
 693
 694     diff &= mask;
 695     if (diff >= (1 << abits) - dsize)
 696         diff -= 1 << abits;
 697     if (diff < -dsize || diff > dsize || !diff)
 698         return abits + 1;
 699     else
 700         return dbits + 1;
 701 }
 702
 703 static int estimate_alpha_plane(ProresContext *ctx, int *error,
 704                                 const uint16_t *src, int linesize,
 705                                 int mbs_per_slice, int quant,
 706                                 int16_t *blocks)
 707 {
 708     const int abits = ctx->alpha_bits;
 709     const int mask  = (1 << abits) - 1;
 710     const int num_coeffs = mbs_per_slice * 256;
 711     int prev = mask, cur;
 712     int idx = 0;
 713     int run = 0;
 714     int bits;
 715
 716     *error = 0;
 717     cur = blocks[idx++];
 718     bits = est_alpha_diff(cur, prev, abits);
 719     prev = cur;
 720     do {
 721         cur = blocks[idx++];
 722         if (cur != prev) {
 723             if (!run)
 724                 bits++;
 725             else if (run < 0x10)
 726                 bits += 4;
 727             else
 728                 bits += 15;
 729             bits += est_alpha_diff(cur, prev, abits);
 730             prev = cur;
 731             run  = 0;
 732         } else {
 733             run++;
 734         }
 735     } while (idx < num_coeffs);
 736
 737     if (run) {
 738         if (run < 0x10)
 739             bits += 4;
 740         else
 741             bits += 15;
 742     }
 743
 744     return bits;
 745 }
 746
 747 static int find_slice_quant(AVCodecContext *avctx,
 748                             int trellis_node, int x, int y, int mbs_per_slice,
 749                             ProresThreadData *td)
 750 {
 751     ProresContext *ctx = avctx->priv_data;
 752     int i, q, pq, xp, yp;
 753     const uint16_t *src;
 754     int slice_width_factor = av_log2(mbs_per_slice);
 755     int num_cblocks[MAX_PLANES], pwidth;
 756     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 757     const int min_quant = ctx->profile_info->min_quant;
 758     const int max_quant = ctx->profile_info->max_quant;
 759     int error, bits, bits_limit;
 760     int mbs, prev, cur, new_score;
 761     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 762     int overquant;
 763     uint16_t *qmat;
 764     int linesize[4], line_add;
 765
 766     if (ctx->pictures_per_frame == 1)
 767         line_add = 0;
 768     else
 769         line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
 770     mbs = x + mbs_per_slice;
 771
 772     for (i = 0; i < ctx->num_planes; i++) {
 773         is_chroma[i]    = (i == 1 || i == 2);
 774         plane_factor[i] = slice_width_factor + 2;
 775         if (is_chroma[i])
 776             plane_factor[i] += ctx->chroma_factor - 3;
 777         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 778             xp             = x << 4;
 779             yp             = y << 4;
 780             num_cblocks[i] = 4;
 781             pwidth         = avctx->width;
 782         } else {
 783             xp             = x << 3;
 784             yp             = y << 4;
 785             num_cblocks[i] = 2;
 786             pwidth         = avctx->width >> 1;
 787         }
 788
 789         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
 790         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
 791                                  line_add * ctx->pic->linesize[i]) + xp;
 792
 793         if (i < 3) {
 794             get_slice_data(ctx, src, linesize[i], xp, yp,
 795                            pwidth, avctx->height / ctx->pictures_per_frame,
 796                            td->blocks[i], td->emu_buf,
 797                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 798         } else {
 799             get_alpha_data(ctx, src, linesize[i], xp, yp,
 800                            pwidth, avctx->height / ctx->pictures_per_frame,
 801                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 802         }
 803     }
 804
 805     for (q = min_quant; q < max_quant + 2; q++) {
 806         td->nodes[trellis_node + q].prev_node = -1;
 807         td->nodes[trellis_node + q].quant     = q;
 808     }
 809
 810     // todo: maybe perform coarser quantising to fit into frame size when needed
 811     for (q = min_quant; q <= max_quant; q++) {
 812         bits  = 0;
 813         error = 0;
 814         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 815             bits += estimate_slice_plane(ctx, &error, i,
 816                                          src, linesize[i],
 817                                          mbs_per_slice,
 818                                          num_cblocks[i], plane_factor[i],
 819                                          ctx->quants[q], td);
 820         }
 821         if (ctx->alpha_bits)
 822             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 823                                          mbs_per_slice, q, td->blocks[3]);
 824         if (bits > 65000 * 8)
 825             error = SCORE_LIMIT;
 826
 827         slice_bits[q]  = bits;
 828         slice_score[q] = error;
 829     }
 830     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 831         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 832         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 833         overquant = max_quant;
 834     } else {
 835         for (q = max_quant + 1; q < 128; q++) {
 836             bits  = 0;
 837             error = 0;
 838             if (q < MAX_STORED_Q) {
 839                 qmat = ctx->quants[q];
 840             } else {
 841                 qmat = td->custom_q;
 842                 for (i = 0; i < 64; i++)
 843                     qmat[i] = ctx->quant_mat[i] * q;
 844             }
 845             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 846                 bits += estimate_slice_plane(ctx, &error, i,
 847                                              src, linesize[i],
 848                                              mbs_per_slice,
 849                                              num_cblocks[i], plane_factor[i],
 850                                              qmat, td);
 851             }
 852             if (ctx->alpha_bits)
 853                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 854                                              mbs_per_slice, q, td->blocks[3]);
 855             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 856                 break;
 857         }
 858
 859         slice_bits[max_quant + 1]  = bits;
 860         slice_score[max_quant + 1] = error;
 861         overquant = q;
 862     }
 863     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 864
 865     bits_limit = mbs * ctx->bits_per_mb;
 866     for (pq = min_quant; pq < max_quant + 2; pq++) {
 867         prev = trellis_node - TRELLIS_WIDTH + pq;
 868
 869         for (q = min_quant; q < max_quant + 2; q++) {
 870             cur = trellis_node + q;
 871
 872             bits  = td->nodes[prev].bits + slice_bits[q];
 873             error = slice_score[q];
 874             if (bits > bits_limit)
 875                 error = SCORE_LIMIT;
 876
 877             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 878                 new_score = td->nodes[prev].score + error;
 879             else
 880                 new_score = SCORE_LIMIT;
 881             if (td->nodes[cur].prev_node == -1 ||
 882                 td->nodes[cur].score >= new_score) {
 883
 884                 td->nodes[cur].bits      = bits;
 885                 td->nodes[cur].score     = new_score;
 886                 td->nodes[cur].prev_node = prev;
 887             }
 888         }
 889     }
 890
 891     error = td->nodes[trellis_node + min_quant].score;
 892     pq    = trellis_node + min_quant;
 893     for (q = min_quant + 1; q < max_quant + 2; q++) {
 894         if (td->nodes[trellis_node + q].score <= error) {
 895             error = td->nodes[trellis_node + q].score;
 896             pq    = trellis_node + q;
 897         }
 898     }
 899
 900     return pq;
 901 }
 902
 903 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 904                              int jobnr, int threadnr)
 905 {
 906     ProresContext *ctx = avctx->priv_data;
 907     ProresThreadData *td = ctx->tdata + threadnr;
 908     int mbs_per_slice = ctx->mbs_per_slice;
 909     int x, y = jobnr, mb, q = 0;
 910
 911     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 912         while (ctx->mb_width - x < mbs_per_slice)
 913             mbs_per_slice >>= 1;
 914         q = find_slice_quant(avctx,
 915                              (mb + 1) * TRELLIS_WIDTH, x, y,
 916                              mbs_per_slice, td);
 917     }
 918
 919     for (x = ctx->slices_width - 1; x >= 0; x--) {
 920         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 921         q = td->nodes[q].prev_node;
 922     }
 923
 924     return 0;
 925 }
 926
 927 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 928                         const AVFrame *pic, int *got_packet)
 929 {
 930     ProresContext *ctx = avctx->priv_data;
 931     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 932     uint8_t *picture_size_pos;
 933     PutBitContext pb;
 934     int x, y, i, mb, q = 0;
 935     int sizes[4] = { 0 };
 936     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 937     int frame_size, picture_size, slice_size;
 938     int pkt_size, ret, max_slice_size = 0;
 939     uint8_t frame_flags;
 940
 941     ctx->pic = pic;
 942     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 943     avctx->coded_frame->key_frame = 1;
 944
 945     pkt_size = ctx->frame_size_upper_bound;
 946
 947     if ((ret = ff_alloc_packet(pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0) {
 948         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 949         return ret;
 950     }
 951
 952     orig_buf = pkt->data;
 953
 954     // frame atom
 955     orig_buf += 4;                              // frame size
 956     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 957     buf = orig_buf;
 958
 959     // frame header
 960     tmp = buf;
 961     buf += 2;                                   // frame header size will be stored here
 962     bytestream_put_be16  (&buf, 0);             // version 1
 963     bytestream_put_buffer(&buf, ctx->vendor, 4);
 964     bytestream_put_be16  (&buf, avctx->width);
 965     bytestream_put_be16  (&buf, avctx->height);
 966
 967     frame_flags = ctx->chroma_factor << 6;
 968     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 969         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 970     bytestream_put_byte  (&buf, frame_flags);
 971
 972     bytestream_put_byte  (&buf, 0);             // reserved
 973     bytestream_put_byte  (&buf, avctx->color_primaries);
 974     bytestream_put_byte  (&buf, avctx->color_trc);
 975     bytestream_put_byte  (&buf, avctx->colorspace);
 976     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
 977     bytestream_put_byte  (&buf, 0);             // reserved
 978     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 979         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 980         // luma quantisation matrix
 981         for (i = 0; i < 64; i++)
 982             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 983         // chroma quantisation matrix
 984         for (i = 0; i < 64; i++)
 985             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 986     } else {
 987         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 988     }
 989     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 990
 991     for (ctx->cur_picture_idx = 0;
 992          ctx->cur_picture_idx < ctx->pictures_per_frame;
 993          ctx->cur_picture_idx++) {
 994         // picture header
 995         picture_size_pos = buf + 1;
 996         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 997         buf += 4;                                   // picture data size will be stored here
 998         bytestream_put_be16  (&buf, ctx->slices_per_picture);
 999         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1000
1001         // seek table - will be filled during slice encoding
1002         slice_sizes = buf;
1003         buf += ctx->slices_per_picture * 2;
1004
1005         // slices
1006         if (!ctx->force_quant) {
1007             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1008                                   ctx->mb_height);
1009             if (ret)
1010                 return ret;
1011         }
1012
1013         for (y = 0; y < ctx->mb_height; y++) {
1014             int mbs_per_slice = ctx->mbs_per_slice;
1015             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1016                 q = ctx->force_quant ? ctx->force_quant
1017                                      : ctx->slice_q[mb + y * ctx->slices_width];
1018
1019                 while (ctx->mb_width - x < mbs_per_slice)
1020                     mbs_per_slice >>= 1;
1021
1022                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1023                 slice_hdr = buf;
1024                 buf += slice_hdr_size - 1;
1025                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1026                     uint8_t *start = pkt->data;
1027                     // Recompute new size according to max_slice_size
1028                     // and deduce delta
1029                     int delta = 200 + ctx->pictures_per_frame *
1030                                 ctx->slices_per_picture * max_slice_size -
1031                                 pkt_size;
1032
1033                     delta = FFMAX(delta, 2 * max_slice_size);
1034                     ctx->frame_size_upper_bound += delta;
1035
1036                     if (!ctx->warn) {
1037                         avpriv_request_sample(avctx,
1038                                               "Packet too small: is %i,"
1039                                               " needs %i (slice: %i). "
1040                                               "Correct allocation",
1041                                               pkt_size, delta, max_slice_size);
1042                         ctx->warn = 1;
1043                     }
1044
1045                     ret = av_grow_packet(pkt, delta);
1046                     if (ret < 0)
1047                         return ret;
1048
1049                     pkt_size += delta;
1050                     // restore pointers
1051                     orig_buf         = pkt->data + (orig_buf         - start);
1052                     buf              = pkt->data + (buf              - start);
1053                     picture_size_pos = pkt->data + (picture_size_pos - start);
1054                     slice_sizes      = pkt->data + (slice_sizes      - start);
1055                     slice_hdr        = pkt->data + (slice_hdr        - start);
1056                     tmp              = pkt->data + (tmp              - start);
1057                 }
1058                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1059                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1060                                    mbs_per_slice);
1061                 if (ret < 0)
1062                     return ret;
1063
1064                 bytestream_put_byte(&slice_hdr, q);
1065                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1066                 for (i = 0; i < ctx->num_planes - 1; i++) {
1067                     bytestream_put_be16(&slice_hdr, sizes[i]);
1068                     slice_size += sizes[i];
1069                 }
1070                 bytestream_put_be16(&slice_sizes, slice_size);
1071                 buf += slice_size - slice_hdr_size;
1072                 if (max_slice_size < slice_size)
1073                     max_slice_size = slice_size;
1074             }
1075         }
1076
1077         if (ctx->pictures_per_frame == 1)
1078             picture_size = buf - picture_size_pos - 6;
1079         else
1080             picture_size = buf - picture_size_pos + 1;
1081         bytestream_put_be32(&picture_size_pos, picture_size);
1082     }
1083
1084     orig_buf -= 8;
1085     frame_size = buf - orig_buf;
1086     bytestream_put_be32(&orig_buf, frame_size);
1087
1088     pkt->size   = frame_size;
1089     pkt->flags |= AV_PKT_FLAG_KEY;
1090     *got_packet = 1;
1091
1092     return 0;
1093 }
1094
1095 static av_cold int encode_close(AVCodecContext *avctx)
1096 {
1097     ProresContext *ctx = avctx->priv_data;
1098     int i;
1099
1100     av_freep(&avctx->coded_frame);
1101
1102     if (ctx->tdata) {
1103         for (i = 0; i < avctx->thread_count; i++)
1104             av_free(ctx->tdata[i].nodes);
1105     }
1106     av_freep(&ctx->tdata);
1107     av_freep(&ctx->slice_q);
1108
1109     return 0;
1110 }
1111
1112 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1113                         int linesize, int16_t *block)
1114 {
1115     int x, y;
1116     const uint16_t *tsrc = src;
1117
1118     for (y = 0; y < 8; y++) {
1119         for (x = 0; x < 8; x++)
1120             block[y * 8 + x] = tsrc[x];
1121         tsrc += linesize >> 1;
1122     }
1123     fdsp->fdct(block);
1124 }
1125
1126 static av_cold int encode_init(AVCodecContext *avctx)
1127 {
1128     ProresContext *ctx = avctx->priv_data;
1129     int mps;
1130     int i, j;
1131     int min_quant, max_quant;
1132     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1133
1134     avctx->bits_per_raw_sample = 10;
1135     avctx->coded_frame = av_frame_alloc();
1136     if (!avctx->coded_frame)
1137         return AVERROR(ENOMEM);
1138
1139     ctx->fdct      = prores_fdct;
1140     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1141                                 : ff_prores_progressive_scan;
1142     ff_fdctdsp_init(&ctx->fdsp, avctx);
1143
1144     mps = ctx->mbs_per_slice;
1145     if (mps & (mps - 1)) {
1146         av_log(avctx, AV_LOG_ERROR,
1147                "there should be an integer power of two MBs per slice\n");
1148         return AVERROR(EINVAL);
1149     }
1150     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1151         if (ctx->alpha_bits & 7) {
1152             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1153             return AVERROR(EINVAL);
1154         }
1155         avctx->bits_per_coded_sample = 32;
1156     } else {
1157         ctx->alpha_bits = 0;
1158     }
1159
1160     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1161                          ? CFACTOR_Y422
1162                          : CFACTOR_Y444;
1163     ctx->profile_info  = prores_profile_info + ctx->profile;
1164     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1165
1166     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1167
1168     if (interlaced)
1169         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1170     else
1171         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1172
1173     ctx->slices_width  = ctx->mb_width / mps;
1174     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1175     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1176     ctx->pictures_per_frame = 1 + interlaced;
1177
1178     if (ctx->quant_sel == -1)
1179         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1180     else
1181         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1182
1183     if (strlen(ctx->vendor) != 4) {
1184         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1185         return AVERROR_INVALIDDATA;
1186     }
1187
1188     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1189     if (!ctx->force_quant) {
1190         if (!ctx->bits_per_mb) {
1191             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1192                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1193                                            ctx->pictures_per_frame)
1194                     break;
1195             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1196         } else if (ctx->bits_per_mb < 128) {
1197             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1198             return AVERROR_INVALIDDATA;
1199         }
1200
1201         min_quant = ctx->profile_info->min_quant;
1202         max_quant = ctx->profile_info->max_quant;
1203         for (i = min_quant; i < MAX_STORED_Q; i++) {
1204             for (j = 0; j < 64; j++)
1205                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1206         }
1207
1208         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1209         if (!ctx->slice_q) {
1210             encode_close(avctx);
1211             return AVERROR(ENOMEM);
1212         }
1213
1214         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1215         if (!ctx->tdata) {
1216             encode_close(avctx);
1217             return AVERROR(ENOMEM);
1218         }
1219
1220         for (j = 0; j < avctx->thread_count; j++) {
1221             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1222                                             * TRELLIS_WIDTH
1223                                             * sizeof(*ctx->tdata->nodes));
1224             if (!ctx->tdata[j].nodes) {
1225                 encode_close(avctx);
1226                 return AVERROR(ENOMEM);
1227             }
1228             for (i = min_quant; i < max_quant + 2; i++) {
1229                 ctx->tdata[j].nodes[i].prev_node = -1;
1230                 ctx->tdata[j].nodes[i].bits      = 0;
1231                 ctx->tdata[j].nodes[i].score     = 0;
1232             }
1233         }
1234     } else {
1235         int ls = 0;
1236
1237         if (ctx->force_quant > 64) {
1238             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1239             return AVERROR_INVALIDDATA;
1240         }
1241
1242         for (j = 0; j < 64; j++) {
1243             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1244             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1245         }
1246
1247         ctx->bits_per_mb = ls * 8;
1248         if (ctx->chroma_factor == CFACTOR_Y444)
1249             ctx->bits_per_mb += ls * 4;
1250     }
1251
1252     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1253                                   ctx->slices_per_picture *
1254                                   (2 + 2 * ctx->num_planes +
1255                                    (mps * ctx->bits_per_mb) / 8)
1256                                   + 200;
1257
1258     if (ctx->alpha_bits) {
1259          // The alpha plane is run-coded and might exceed the bit budget.
1260          ctx->frame_size_upper_bound += ctx->pictures_per_frame *
1261                                         ctx->slices_per_picture *
1262          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1263          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1264     }
1265
1266     avctx->codec_tag   = ctx->profile_info->tag;
1267
1268     av_log(avctx, AV_LOG_DEBUG,
1269            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1270            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1271            interlaced ? "yes" : "no", ctx->bits_per_mb);
1272     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1273            ctx->frame_size_upper_bound);
1274
1275     return 0;
1276 }
1277
1278 #define OFFSET(x) offsetof(ProresContext, x)
1279 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1280
1281 static const AVOption options[] = {
1282     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1283         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1284     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1285         { .i64 = PRORES_PROFILE_STANDARD },
1286         PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1287     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1288         0, 0, VE, "profile" },
1289     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1290         0, 0, VE, "profile" },
1291     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1292         0, 0, VE, "profile" },
1293     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1294         0, 0, VE, "profile" },
1295     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1296         0, 0, VE, "profile" },
1297     { "vendor", "vendor ID", OFFSET(vendor),
1298         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1299     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1300         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1301     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1302         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1303     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1304         0, 0, VE, "quant_mat" },
1305     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1306         0, 0, VE, "quant_mat" },
1307     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1308         0, 0, VE, "quant_mat" },
1309     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1310         0, 0, VE, "quant_mat" },
1311     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1312         0, 0, VE, "quant_mat" },
1313     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1314         0, 0, VE, "quant_mat" },
1315     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1316         { .i64 = 16 }, 0, 16, VE },
1317     { NULL }
1318 };
1319
1320 static const AVClass proresenc_class = {
1321     .class_name = "ProRes encoder",
1322     .item_name  = av_default_item_name,
1323     .option     = options,
1324     .version    = LIBAVUTIL_VERSION_INT,
1325 };
1326
1327 AVCodec ff_prores_encoder = {
1328     .name           = "prores",
1329     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1330     .type           = AVMEDIA_TYPE_VIDEO,
1331     .id             = AV_CODEC_ID_PRORES,
1332     .priv_data_size = sizeof(ProresContext),
1333     .init           = encode_init,
1334     .close          = encode_close,
1335     .encode2        = encode_frame,
1336     .capabilities   = CODEC_CAP_SLICE_THREADS,
1337     .pix_fmts       = (const enum AVPixelFormat[]) {
1338                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1339                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1340                       },
1341     .priv_class     = &proresenc_class,
1342 };