git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This encoder appears to be based on Anatoliy Wassermans considering
   7  * similarities in the bugs.
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/opt.h"
  27 #include "libavutil/pixdesc.h"
  28 #include "avcodec.h"
  29 #include "fdctdsp.h"
  30 #include "put_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "proresdata.h"
  34
  35 #define CFACTOR_Y422 2
  36 #define CFACTOR_Y444 3
  37
  38 #define MAX_MBS_PER_SLICE 8
  39
  40 #define MAX_PLANES 4
  41
  42 enum {
  43     PRORES_PROFILE_PROXY = 0,
  44     PRORES_PROFILE_LT,
  45     PRORES_PROFILE_STANDARD,
  46     PRORES_PROFILE_HQ,
  47     PRORES_PROFILE_4444,
  48 };
  49
  50 enum {
  51     QUANT_MAT_PROXY = 0,
  52     QUANT_MAT_LT,
  53     QUANT_MAT_STANDARD,
  54     QUANT_MAT_HQ,
  55     QUANT_MAT_DEFAULT,
  56 };
  57
  58 static const uint8_t prores_quant_matrices[][64] = {
  59     { // proxy
  60          4,  7,  9, 11, 13, 14, 15, 63,
  61          7,  7, 11, 12, 14, 15, 63, 63,
  62          9, 11, 13, 14, 15, 63, 63, 63,
  63         11, 11, 13, 14, 63, 63, 63, 63,
  64         11, 13, 14, 63, 63, 63, 63, 63,
  65         13, 14, 63, 63, 63, 63, 63, 63,
  66         13, 63, 63, 63, 63, 63, 63, 63,
  67         63, 63, 63, 63, 63, 63, 63, 63,
  68     },
  69     { // LT
  70          4,  5,  6,  7,  9, 11, 13, 15,
  71          5,  5,  7,  8, 11, 13, 15, 17,
  72          6,  7,  9, 11, 13, 15, 15, 17,
  73          7,  7,  9, 11, 13, 15, 17, 19,
  74          7,  9, 11, 13, 14, 16, 19, 23,
  75          9, 11, 13, 14, 16, 19, 23, 29,
  76          9, 11, 13, 15, 17, 21, 28, 35,
  77         11, 13, 16, 17, 21, 28, 35, 41,
  78     },
  79     { // standard
  80          4,  4,  5,  5,  6,  7,  7,  9,
  81          4,  4,  5,  6,  7,  7,  9,  9,
  82          5,  5,  6,  7,  7,  9,  9, 10,
  83          5,  5,  6,  7,  7,  9,  9, 10,
  84          5,  6,  7,  7,  8,  9, 10, 12,
  85          6,  7,  7,  8,  9, 10, 12, 15,
  86          6,  7,  7,  9, 10, 11, 14, 17,
  87          7,  7,  9, 10, 11, 14, 17, 21,
  88     },
  89     { // high quality
  90          4,  4,  4,  4,  4,  4,  4,  4,
  91          4,  4,  4,  4,  4,  4,  4,  4,
  92          4,  4,  4,  4,  4,  4,  4,  4,
  93          4,  4,  4,  4,  4,  4,  4,  5,
  94          4,  4,  4,  4,  4,  4,  5,  5,
  95          4,  4,  4,  4,  4,  5,  5,  6,
  96          4,  4,  4,  4,  5,  5,  6,  7,
  97          4,  4,  4,  4,  5,  6,  7,  7,
  98     },
  99     { // codec default
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105          4,  4,  4,  4,  4,  4,  4,  4,
 106          4,  4,  4,  4,  4,  4,  4,  4,
 107          4,  4,  4,  4,  4,  4,  4,  4,
 108     },
 109 };
 110
 111 #define NUM_MB_LIMITS 4
 112 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 113     1620, // up to 720x576
 114     2700, // up to 960x720
 115     6075, // up to 1440x1080
 116     9216, // up to 2048x1152
 117 };
 118
 119 static const struct prores_profile {
 120     const char *full_name;
 121     uint32_t    tag;
 122     int         min_quant;
 123     int         max_quant;
 124     int         br_tab[NUM_MB_LIMITS];
 125     int         quant;
 126 } prores_profile_info[5] = {
 127     {
 128         .full_name = "proxy",
 129         .tag       = MKTAG('a', 'p', 'c', 'o'),
 130         .min_quant = 4,
 131         .max_quant = 8,
 132         .br_tab    = { 300, 242, 220, 194 },
 133         .quant     = QUANT_MAT_PROXY,
 134     },
 135     {
 136         .full_name = "LT",
 137         .tag       = MKTAG('a', 'p', 'c', 's'),
 138         .min_quant = 1,
 139         .max_quant = 9,
 140         .br_tab    = { 720, 560, 490, 440 },
 141         .quant     = QUANT_MAT_LT,
 142     },
 143     {
 144         .full_name = "standard",
 145         .tag       = MKTAG('a', 'p', 'c', 'n'),
 146         .min_quant = 1,
 147         .max_quant = 6,
 148         .br_tab    = { 1050, 808, 710, 632 },
 149         .quant     = QUANT_MAT_STANDARD,
 150     },
 151     {
 152         .full_name = "high quality",
 153         .tag       = MKTAG('a', 'p', 'c', 'h'),
 154         .min_quant = 1,
 155         .max_quant = 6,
 156         .br_tab    = { 1566, 1216, 1070, 950 },
 157         .quant     = QUANT_MAT_HQ,
 158     },
 159     {
 160         .full_name = "4444",
 161         .tag       = MKTAG('a', 'p', '4', 'h'),
 162         .min_quant = 1,
 163         .max_quant = 6,
 164         .br_tab    = { 2350, 1828, 1600, 1425 },
 165         .quant     = QUANT_MAT_HQ,
 166     }
 167 };
 168
 169 #define TRELLIS_WIDTH 16
 170 #define SCORE_LIMIT   INT_MAX / 2
 171
 172 struct TrellisNode {
 173     int prev_node;
 174     int quant;
 175     int bits;
 176     int score;
 177 };
 178
 179 #define MAX_STORED_Q 16
 180
 181 typedef struct ProresThreadData {
 182     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 183     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 184     int16_t custom_q[64];
 185     struct TrellisNode *nodes;
 186 } ProresThreadData;
 187
 188 typedef struct ProresContext {
 189     AVClass *class;
 190     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 191     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 192     int16_t quants[MAX_STORED_Q][64];
 193     int16_t custom_q[64];
 194     const uint8_t *quant_mat;
 195     const uint8_t *scantable;
 196
 197     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
 198                  int linesize, int16_t *block);
 199     FDCTDSPContext fdsp;
 200
 201     int mb_width, mb_height;
 202     int mbs_per_slice;
 203     int num_chroma_blocks, chroma_factor;
 204     int slices_width;
 205     int slices_per_picture;
 206     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 207     int cur_picture_idx;
 208     int num_planes;
 209     int bits_per_mb;
 210     int force_quant;
 211     int alpha_bits;
 212
 213     char *vendor;
 214     int quant_sel;
 215
 216     int frame_size_upper_bound;
 217
 218     int profile;
 219     const struct prores_profile *profile_info;
 220
 221     int *slice_q;
 222
 223     ProresThreadData *tdata;
 224 } ProresContext;
 225
 226 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 227                            int linesize, int x, int y, int w, int h,
 228                            int16_t *blocks, uint16_t *emu_buf,
 229                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 230 {
 231     const uint16_t *esrc;
 232     const int mb_width = 4 * blocks_per_mb;
 233     int elinesize;
 234     int i, j, k;
 235
 236     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 237         if (x >= w) {
 238             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 239                               * sizeof(*blocks));
 240             return;
 241         }
 242         if (x + mb_width <= w && y + 16 <= h) {
 243             esrc      = src;
 244             elinesize = linesize;
 245         } else {
 246             int bw, bh, pix;
 247
 248             esrc      = emu_buf;
 249             elinesize = 16 * sizeof(*emu_buf);
 250
 251             bw = FFMIN(w - x, mb_width);
 252             bh = FFMIN(h - y, 16);
 253
 254             for (j = 0; j < bh; j++) {
 255                 memcpy(emu_buf + j * 16,
 256                        (const uint8_t*)src + j * linesize,
 257                        bw * sizeof(*src));
 258                 pix = emu_buf[j * 16 + bw - 1];
 259                 for (k = bw; k < mb_width; k++)
 260                     emu_buf[j * 16 + k] = pix;
 261             }
 262             for (; j < 16; j++)
 263                 memcpy(emu_buf + j * 16,
 264                        emu_buf + (bh - 1) * 16,
 265                        mb_width * sizeof(*emu_buf));
 266         }
 267         if (!is_chroma) {
 268             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 269             blocks += 64;
 270             if (blocks_per_mb > 2) {
 271                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 272                 blocks += 64;
 273             }
 274             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 275             blocks += 64;
 276             if (blocks_per_mb > 2) {
 277                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 278                 blocks += 64;
 279             }
 280         } else {
 281             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
 282             blocks += 64;
 283             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
 284             blocks += 64;
 285             if (blocks_per_mb > 2) {
 286                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
 287                 blocks += 64;
 288                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
 289                 blocks += 64;
 290             }
 291         }
 292
 293         x += mb_width;
 294     }
 295 }
 296
 297 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
 298                            int linesize, int x, int y, int w, int h,
 299                            int16_t *blocks, int mbs_per_slice, int abits)
 300 {
 301     const int slice_width = 16 * mbs_per_slice;
 302     int i, j, copy_w, copy_h;
 303
 304     copy_w = FFMIN(w - x, slice_width);
 305     copy_h = FFMIN(h - y, 16);
 306     for (i = 0; i < copy_h; i++) {
 307         memcpy(blocks, src, copy_w * sizeof(*src));
 308         if (abits == 8)
 309             for (j = 0; j < copy_w; j++)
 310                 blocks[j] >>= 2;
 311         else
 312             for (j = 0; j < copy_w; j++)
 313                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
 314         for (j = copy_w; j < slice_width; j++)
 315             blocks[j] = blocks[copy_w - 1];
 316         blocks += slice_width;
 317         src    += linesize >> 1;
 318     }
 319     for (; i < 16; i++) {
 320         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
 321         blocks += slice_width;
 322     }
 323 }
 324
 325 /**
 326  * Write an unsigned rice/exp golomb codeword.
 327  */
 328 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 329 {
 330     unsigned int rice_order, exp_order, switch_bits, switch_val;
 331     int exponent;
 332
 333     /* number of prefix bits to switch between Rice and expGolomb */
 334     switch_bits = (codebook & 3) + 1;
 335     rice_order  =  codebook >> 5;       /* rice code order */
 336     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 337
 338     switch_val  = switch_bits << rice_order;
 339
 340     if (val >= switch_val) {
 341         val -= switch_val - (1 << exp_order);
 342         exponent = av_log2(val);
 343
 344         put_bits(pb, exponent - exp_order + switch_bits, 0);
 345         put_bits(pb, exponent + 1, val);
 346     } else {
 347         exponent = val >> rice_order;
 348
 349         if (exponent)
 350             put_bits(pb, exponent, 0);
 351         put_bits(pb, 1, 1);
 352         if (rice_order)
 353             put_sbits(pb, rice_order, val);
 354     }
 355 }
 356
 357 #define GET_SIGN(x)  ((x) >> 31)
 358 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 359
 360 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
 361                        int blocks_per_slice, int scale)
 362 {
 363     int i;
 364     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 365
 366     prev_dc = (blocks[0] - 0x4000) / scale;
 367     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 368     sign     = 0;
 369     codebook = 3;
 370     blocks  += 64;
 371
 372     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 373         dc       = (blocks[0] - 0x4000) / scale;
 374         delta    = dc - prev_dc;
 375         new_sign = GET_SIGN(delta);
 376         delta    = (delta ^ sign) - sign;
 377         code     = MAKE_CODE(delta);
 378         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 379         codebook = (code + (code & 1)) >> 1;
 380         codebook = FFMIN(codebook, 3);
 381         sign     = new_sign;
 382         prev_dc  = dc;
 383     }
 384 }
 385
 386 static void encode_acs(PutBitContext *pb, int16_t *blocks,
 387                        int blocks_per_slice,
 388                        int plane_size_factor,
 389                        const uint8_t *scan, const int16_t *qmat)
 390 {
 391     int idx, i;
 392     int run, level, run_cb, lev_cb;
 393     int max_coeffs, abs_level;
 394
 395     max_coeffs = blocks_per_slice << 6;
 396     run_cb     = ff_prores_run_to_cb_index[4];
 397     lev_cb     = ff_prores_lev_to_cb_index[2];
 398     run        = 0;
 399
 400     for (i = 1; i < 64; i++) {
 401         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 402             level = blocks[idx] / qmat[scan[i]];
 403             if (level) {
 404                 abs_level = FFABS(level);
 405                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 406                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 407                                     abs_level - 1);
 408                 put_sbits(pb, 1, GET_SIGN(level));
 409
 410                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 411                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 412                 run    = 0;
 413             } else {
 414                 run++;
 415             }
 416         }
 417     }
 418 }
 419
 420 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 421                               const uint16_t *src, int linesize,
 422                               int mbs_per_slice, int16_t *blocks,
 423                               int blocks_per_mb, int plane_size_factor,
 424                               const int16_t *qmat)
 425 {
 426     int blocks_per_slice, saved_pos;
 427
 428     saved_pos = put_bits_count(pb);
 429     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 430
 431     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 432     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 433                ctx->scantable, qmat);
 434     flush_put_bits(pb);
 435
 436     return (put_bits_count(pb) - saved_pos) >> 3;
 437 }
 438
 439 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 440 {
 441     const int mask  = (1 << abits) - 1;
 442     const int dbits = (abits == 8) ? 4 : 7;
 443     const int dsize = 1 << dbits - 1;
 444     int diff = cur - prev;
 445
 446     diff &= mask;
 447     if (diff >= (1 << abits) - dsize)
 448         diff -= 1 << abits;
 449     if (diff < -dsize || diff > dsize || !diff) {
 450         put_bits(pb, 1, 1);
 451         put_bits(pb, abits, diff);
 452     } else {
 453         put_bits(pb, 1, 0);
 454         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 455         put_bits(pb, 1, diff < 0);
 456     }
 457 }
 458
 459 static void put_alpha_run(PutBitContext *pb, int run)
 460 {
 461     if (run) {
 462         put_bits(pb, 1, 0);
 463         if (run < 0x10)
 464             put_bits(pb, 4, run);
 465         else
 466             put_bits(pb, 15, run);
 467     } else {
 468         put_bits(pb, 1, 1);
 469     }
 470 }
 471
 472 // todo alpha quantisation for high quants
 473 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
 474                               int mbs_per_slice, uint16_t *blocks,
 475                               int quant)
 476 {
 477     const int abits = ctx->alpha_bits;
 478     const int mask  = (1 << abits) - 1;
 479     const int num_coeffs = mbs_per_slice * 256;
 480     int saved_pos = put_bits_count(pb);
 481     int prev = mask, cur;
 482     int idx = 0;
 483     int run = 0;
 484
 485     cur = blocks[idx++];
 486     put_alpha_diff(pb, cur, prev, abits);
 487     prev = cur;
 488     do {
 489         cur = blocks[idx++];
 490         if (cur != prev) {
 491             put_alpha_run (pb, run);
 492             put_alpha_diff(pb, cur, prev, abits);
 493             prev = cur;
 494             run  = 0;
 495         } else {
 496             run++;
 497         }
 498     } while (idx < num_coeffs);
 499     if (run)
 500         put_alpha_run(pb, run);
 501     flush_put_bits(pb);
 502     return (put_bits_count(pb) - saved_pos) >> 3;
 503 }
 504
 505 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 506                         PutBitContext *pb,
 507                         int sizes[4], int x, int y, int quant,
 508                         int mbs_per_slice)
 509 {
 510     ProresContext *ctx = avctx->priv_data;
 511     int i, xp, yp;
 512     int total_size = 0;
 513     const uint16_t *src;
 514     int slice_width_factor = av_log2(mbs_per_slice);
 515     int num_cblocks, pwidth, linesize, line_add;
 516     int plane_factor, is_chroma;
 517     uint16_t *qmat;
 518
 519     if (ctx->pictures_per_frame == 1)
 520         line_add = 0;
 521     else
 522         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 523
 524     if (ctx->force_quant) {
 525         qmat = ctx->quants[0];
 526     } else if (quant < MAX_STORED_Q) {
 527         qmat = ctx->quants[quant];
 528     } else {
 529         qmat = ctx->custom_q;
 530         for (i = 0; i < 64; i++)
 531             qmat[i] = ctx->quant_mat[i] * quant;
 532     }
 533
 534     for (i = 0; i < ctx->num_planes; i++) {
 535         is_chroma    = (i == 1 || i == 2);
 536         plane_factor = slice_width_factor + 2;
 537         if (is_chroma)
 538             plane_factor += ctx->chroma_factor - 3;
 539         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 540             xp          = x << 4;
 541             yp          = y << 4;
 542             num_cblocks = 4;
 543             pwidth      = avctx->width;
 544         } else {
 545             xp          = x << 3;
 546             yp          = y << 4;
 547             num_cblocks = 2;
 548             pwidth      = avctx->width >> 1;
 549         }
 550
 551         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 552         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 553                                 line_add * pic->linesize[i]) + xp;
 554
 555         if (i < 3) {
 556             get_slice_data(ctx, src, linesize, xp, yp,
 557                            pwidth, avctx->height / ctx->pictures_per_frame,
 558                            ctx->blocks[0], ctx->emu_buf,
 559                            mbs_per_slice, num_cblocks, is_chroma);
 560             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 561                                           mbs_per_slice, ctx->blocks[0],
 562                                           num_cblocks, plane_factor,
 563                                           qmat);
 564         } else {
 565             get_alpha_data(ctx, src, linesize, xp, yp,
 566                            pwidth, avctx->height / ctx->pictures_per_frame,
 567                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
 568             sizes[i] = encode_alpha_plane(ctx, pb,
 569                                           mbs_per_slice, ctx->blocks[0],
 570                                           quant);
 571         }
 572         total_size += sizes[i];
 573         if (put_bits_left(pb) < 0) {
 574             av_log(avctx, AV_LOG_ERROR, "Serious underevaluation of"
 575                    "required buffer size");
 576             return AVERROR_BUFFER_TOO_SMALL;
 577         }
 578     }
 579     return total_size;
 580 }
 581
 582 static inline int estimate_vlc(unsigned codebook, int val)
 583 {
 584     unsigned int rice_order, exp_order, switch_bits, switch_val;
 585     int exponent;
 586
 587     /* number of prefix bits to switch between Rice and expGolomb */
 588     switch_bits = (codebook & 3) + 1;
 589     rice_order  =  codebook >> 5;       /* rice code order */
 590     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 591
 592     switch_val  = switch_bits << rice_order;
 593
 594     if (val >= switch_val) {
 595         val -= switch_val - (1 << exp_order);
 596         exponent = av_log2(val);
 597
 598         return exponent * 2 - exp_order + switch_bits + 1;
 599     } else {
 600         return (val >> rice_order) + rice_order + 1;
 601     }
 602 }
 603
 604 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
 605                         int scale)
 606 {
 607     int i;
 608     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 609     int bits;
 610
 611     prev_dc  = (blocks[0] - 0x4000) / scale;
 612     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 613     sign     = 0;
 614     codebook = 3;
 615     blocks  += 64;
 616     *error  += FFABS(blocks[0] - 0x4000) % scale;
 617
 618     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 619         dc       = (blocks[0] - 0x4000) / scale;
 620         *error  += FFABS(blocks[0] - 0x4000) % scale;
 621         delta    = dc - prev_dc;
 622         new_sign = GET_SIGN(delta);
 623         delta    = (delta ^ sign) - sign;
 624         code     = MAKE_CODE(delta);
 625         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 626         codebook = (code + (code & 1)) >> 1;
 627         codebook = FFMIN(codebook, 3);
 628         sign     = new_sign;
 629         prev_dc  = dc;
 630     }
 631
 632     return bits;
 633 }
 634
 635 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
 636                         int plane_size_factor,
 637                         const uint8_t *scan, const int16_t *qmat)
 638 {
 639     int idx, i;
 640     int run, level, run_cb, lev_cb;
 641     int max_coeffs, abs_level;
 642     int bits = 0;
 643
 644     max_coeffs = blocks_per_slice << 6;
 645     run_cb     = ff_prores_run_to_cb_index[4];
 646     lev_cb     = ff_prores_lev_to_cb_index[2];
 647     run        = 0;
 648
 649     for (i = 1; i < 64; i++) {
 650         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 651             level   = blocks[idx] / qmat[scan[i]];
 652             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 653             if (level) {
 654                 abs_level = FFABS(level);
 655                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 656                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 657                                      abs_level - 1) + 1;
 658
 659                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 660                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 661                 run    = 0;
 662             } else {
 663                 run++;
 664             }
 665         }
 666     }
 667
 668     return bits;
 669 }
 670
 671 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 672                                 const uint16_t *src, int linesize,
 673                                 int mbs_per_slice,
 674                                 int blocks_per_mb, int plane_size_factor,
 675                                 const int16_t *qmat, ProresThreadData *td)
 676 {
 677     int blocks_per_slice;
 678     int bits;
 679
 680     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 681
 682     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 683     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 684                          plane_size_factor, ctx->scantable, qmat);
 685
 686     return FFALIGN(bits, 8);
 687 }
 688
 689 static int est_alpha_diff(int cur, int prev, int abits)
 690 {
 691     const int mask  = (1 << abits) - 1;
 692     const int dbits = (abits == 8) ? 4 : 7;
 693     const int dsize = 1 << dbits - 1;
 694     int diff = cur - prev;
 695
 696     diff &= mask;
 697     if (diff >= (1 << abits) - dsize)
 698         diff -= 1 << abits;
 699     if (diff < -dsize || diff > dsize || !diff)
 700         return abits + 1;
 701     else
 702         return dbits + 1;
 703 }
 704
 705 static int estimate_alpha_plane(ProresContext *ctx, int *error,
 706                                 const uint16_t *src, int linesize,
 707                                 int mbs_per_slice, int quant,
 708                                 int16_t *blocks)
 709 {
 710     const int abits = ctx->alpha_bits;
 711     const int mask  = (1 << abits) - 1;
 712     const int num_coeffs = mbs_per_slice * 256;
 713     int prev = mask, cur;
 714     int idx = 0;
 715     int run = 0;
 716     int bits;
 717
 718     *error = 0;
 719     cur = blocks[idx++];
 720     bits = est_alpha_diff(cur, prev, abits);
 721     prev = cur;
 722     do {
 723         cur = blocks[idx++];
 724         if (cur != prev) {
 725             if (!run)
 726                 bits++;
 727             else if (run < 0x10)
 728                 bits += 4;
 729             else
 730                 bits += 15;
 731             bits += est_alpha_diff(cur, prev, abits);
 732             prev = cur;
 733             run  = 0;
 734         } else {
 735             run++;
 736         }
 737     } while (idx < num_coeffs);
 738
 739     if (run) {
 740         if (run < 0x10)
 741             bits += 4;
 742         else
 743             bits += 15;
 744     }
 745
 746     return bits;
 747 }
 748
 749 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 750                             int trellis_node, int x, int y, int mbs_per_slice,
 751                             ProresThreadData *td)
 752 {
 753     ProresContext *ctx = avctx->priv_data;
 754     int i, q, pq, xp, yp;
 755     const uint16_t *src;
 756     int slice_width_factor = av_log2(mbs_per_slice);
 757     int num_cblocks[MAX_PLANES], pwidth;
 758     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 759     const int min_quant = ctx->profile_info->min_quant;
 760     const int max_quant = ctx->profile_info->max_quant;
 761     int error, bits, bits_limit;
 762     int mbs, prev, cur, new_score;
 763     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 764     int overquant;
 765     uint16_t *qmat;
 766     int linesize[4], line_add;
 767
 768     if (ctx->pictures_per_frame == 1)
 769         line_add = 0;
 770     else
 771         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 772     mbs = x + mbs_per_slice;
 773
 774     for (i = 0; i < ctx->num_planes; i++) {
 775         is_chroma[i]    = (i == 1 || i == 2);
 776         plane_factor[i] = slice_width_factor + 2;
 777         if (is_chroma[i])
 778             plane_factor[i] += ctx->chroma_factor - 3;
 779         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 780             xp             = x << 4;
 781             yp             = y << 4;
 782             num_cblocks[i] = 4;
 783             pwidth         = avctx->width;
 784         } else {
 785             xp             = x << 3;
 786             yp             = y << 4;
 787             num_cblocks[i] = 2;
 788             pwidth         = avctx->width >> 1;
 789         }
 790
 791         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
 792         src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
 793                                 line_add * pic->linesize[i]) + xp;
 794
 795         if (i < 3) {
 796             get_slice_data(ctx, src, linesize[i], xp, yp,
 797                            pwidth, avctx->height / ctx->pictures_per_frame,
 798                            td->blocks[i], td->emu_buf,
 799                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
 800         } else {
 801             get_alpha_data(ctx, src, linesize[i], xp, yp,
 802                            pwidth, avctx->height / ctx->pictures_per_frame,
 803                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
 804         }
 805     }
 806
 807     for (q = min_quant; q < max_quant + 2; q++) {
 808         td->nodes[trellis_node + q].prev_node = -1;
 809         td->nodes[trellis_node + q].quant     = q;
 810     }
 811
 812     // todo: maybe perform coarser quantising to fit into frame size when needed
 813     for (q = min_quant; q <= max_quant; q++) {
 814         bits  = 0;
 815         error = 0;
 816         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 817             bits += estimate_slice_plane(ctx, &error, i,
 818                                          src, linesize[i],
 819                                          mbs_per_slice,
 820                                          num_cblocks[i], plane_factor[i],
 821                                          ctx->quants[q], td);
 822         }
 823         if (ctx->alpha_bits)
 824             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 825                                          mbs_per_slice, q, td->blocks[3]);
 826         if (bits > 65000 * 8) {
 827             error = SCORE_LIMIT;
 828             break;
 829         }
 830         slice_bits[q]  = bits;
 831         slice_score[q] = error;
 832     }
 833     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 834         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 835         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 836         overquant = max_quant;
 837     } else {
 838         for (q = max_quant + 1; q < 128; q++) {
 839             bits  = 0;
 840             error = 0;
 841             if (q < MAX_STORED_Q) {
 842                 qmat = ctx->quants[q];
 843             } else {
 844                 qmat = td->custom_q;
 845                 for (i = 0; i < 64; i++)
 846                     qmat[i] = ctx->quant_mat[i] * q;
 847             }
 848             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
 849                 bits += estimate_slice_plane(ctx, &error, i,
 850                                              src, linesize[i],
 851                                              mbs_per_slice,
 852                                              num_cblocks[i], plane_factor[i],
 853                                              qmat, td);
 854             }
 855             if (ctx->alpha_bits)
 856                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
 857                                              mbs_per_slice, q, td->blocks[3]);
 858             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 859                 break;
 860         }
 861
 862         slice_bits[max_quant + 1]  = bits;
 863         slice_score[max_quant + 1] = error;
 864         overquant = q;
 865     }
 866     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 867
 868     bits_limit = mbs * ctx->bits_per_mb;
 869     for (pq = min_quant; pq < max_quant + 2; pq++) {
 870         prev = trellis_node - TRELLIS_WIDTH + pq;
 871
 872         for (q = min_quant; q < max_quant + 2; q++) {
 873             cur = trellis_node + q;
 874
 875             bits  = td->nodes[prev].bits + slice_bits[q];
 876             error = slice_score[q];
 877             if (bits > bits_limit)
 878                 error = SCORE_LIMIT;
 879
 880             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 881                 new_score = td->nodes[prev].score + error;
 882             else
 883                 new_score = SCORE_LIMIT;
 884             if (td->nodes[cur].prev_node == -1 ||
 885                 td->nodes[cur].score >= new_score) {
 886
 887                 td->nodes[cur].bits      = bits;
 888                 td->nodes[cur].score     = new_score;
 889                 td->nodes[cur].prev_node = prev;
 890             }
 891         }
 892     }
 893
 894     error = td->nodes[trellis_node + min_quant].score;
 895     pq    = trellis_node + min_quant;
 896     for (q = min_quant + 1; q < max_quant + 2; q++) {
 897         if (td->nodes[trellis_node + q].score <= error) {
 898             error = td->nodes[trellis_node + q].score;
 899             pq    = trellis_node + q;
 900         }
 901     }
 902
 903     return pq;
 904 }
 905
 906 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 907                              int jobnr, int threadnr)
 908 {
 909     ProresContext *ctx = avctx->priv_data;
 910     ProresThreadData *td = ctx->tdata + threadnr;
 911     int mbs_per_slice = ctx->mbs_per_slice;
 912     int x, y = jobnr, mb, q = 0;
 913
 914     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 915         while (ctx->mb_width - x < mbs_per_slice)
 916             mbs_per_slice >>= 1;
 917         q = find_slice_quant(avctx, avctx->coded_frame,
 918                              (mb + 1) * TRELLIS_WIDTH, x, y,
 919                              mbs_per_slice, td);
 920     }
 921
 922     for (x = ctx->slices_width - 1; x >= 0; x--) {
 923         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 924         q = td->nodes[q].prev_node;
 925     }
 926
 927     return 0;
 928 }
 929
 930 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 931                         const AVFrame *pic, int *got_packet)
 932 {
 933     ProresContext *ctx = avctx->priv_data;
 934     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 935     uint8_t *picture_size_pos;
 936     PutBitContext pb;
 937     int x, y, i, mb, q = 0;
 938     int sizes[4] = { 0 };
 939     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 940     int frame_size, picture_size, slice_size;
 941     int pkt_size, ret;
 942     uint8_t frame_flags;
 943
 944     *avctx->coded_frame           = *pic;
 945     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 946     avctx->coded_frame->key_frame = 1;
 947
 948     pkt_size = ctx->frame_size_upper_bound;
 949
 950     if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
 951         return ret;
 952
 953     orig_buf = pkt->data;
 954
 955     // frame atom
 956     orig_buf += 4;                              // frame size
 957     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 958     buf = orig_buf;
 959
 960     // frame header
 961     tmp = buf;
 962     buf += 2;                                   // frame header size will be stored here
 963     bytestream_put_be16  (&buf, 0);             // version 1
 964     bytestream_put_buffer(&buf, ctx->vendor, 4);
 965     bytestream_put_be16  (&buf, avctx->width);
 966     bytestream_put_be16  (&buf, avctx->height);
 967
 968     frame_flags = ctx->chroma_factor << 6;
 969     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 970         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 971     bytestream_put_byte  (&buf, frame_flags);
 972
 973     bytestream_put_byte  (&buf, 0);             // reserved
 974     bytestream_put_byte  (&buf, avctx->color_primaries);
 975     bytestream_put_byte  (&buf, avctx->color_trc);
 976     bytestream_put_byte  (&buf, avctx->colorspace);
 977     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
 978     bytestream_put_byte  (&buf, 0);             // reserved
 979     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 980         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 981         // luma quantisation matrix
 982         for (i = 0; i < 64; i++)
 983             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 984         // chroma quantisation matrix
 985         for (i = 0; i < 64; i++)
 986             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 987     } else {
 988         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 989     }
 990     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 991
 992     for (ctx->cur_picture_idx = 0;
 993          ctx->cur_picture_idx < ctx->pictures_per_frame;
 994          ctx->cur_picture_idx++) {
 995         // picture header
 996         picture_size_pos = buf + 1;
 997         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 998         buf += 4;                                   // picture data size will be stored here
 999         bytestream_put_be16  (&buf, ctx->slices_per_picture);
1000         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1001
1002         // seek table - will be filled during slice encoding
1003         slice_sizes = buf;
1004         buf += ctx->slices_per_picture * 2;
1005
1006         // slices
1007         if (!ctx->force_quant) {
1008             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1009                                   ctx->mb_height);
1010             if (ret)
1011                 return ret;
1012         }
1013
1014         for (y = 0; y < ctx->mb_height; y++) {
1015             int mbs_per_slice = ctx->mbs_per_slice;
1016             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1017                 q = ctx->force_quant ? ctx->force_quant
1018                                      : ctx->slice_q[mb + y * ctx->slices_width];
1019
1020                 while (ctx->mb_width - x < mbs_per_slice)
1021                     mbs_per_slice >>= 1;
1022
1023                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1024                 slice_hdr = buf;
1025                 buf += slice_hdr_size - 1;
1026                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1027                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1028                 if (ret < 0)
1029                     return ret;
1030
1031                 bytestream_put_byte(&slice_hdr, q);
1032                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1033                 for (i = 0; i < ctx->num_planes - 1; i++) {
1034                     bytestream_put_be16(&slice_hdr, sizes[i]);
1035                     slice_size += sizes[i];
1036                 }
1037                 bytestream_put_be16(&slice_sizes, slice_size);
1038                 buf += slice_size - slice_hdr_size;
1039             }
1040         }
1041
1042         picture_size = buf - (picture_size_pos - 1);
1043         bytestream_put_be32(&picture_size_pos, picture_size);
1044     }
1045
1046     orig_buf -= 8;
1047     frame_size = buf - orig_buf;
1048     bytestream_put_be32(&orig_buf, frame_size);
1049
1050     pkt->size   = frame_size;
1051     pkt->flags |= AV_PKT_FLAG_KEY;
1052     *got_packet = 1;
1053
1054     return 0;
1055 }
1056
1057 static av_cold int encode_close(AVCodecContext *avctx)
1058 {
1059     ProresContext *ctx = avctx->priv_data;
1060     int i;
1061
1062     av_freep(&avctx->coded_frame);
1063
1064     if (ctx->tdata) {
1065         for (i = 0; i < avctx->thread_count; i++)
1066             av_free(ctx->tdata[i].nodes);
1067     }
1068     av_freep(&ctx->tdata);
1069     av_freep(&ctx->slice_q);
1070
1071     return 0;
1072 }
1073
1074 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1075                         int linesize, int16_t *block)
1076 {
1077     int x, y;
1078     const uint16_t *tsrc = src;
1079
1080     for (y = 0; y < 8; y++) {
1081         for (x = 0; x < 8; x++)
1082             block[y * 8 + x] = tsrc[x];
1083         tsrc += linesize >> 1;
1084     }
1085     fdsp->fdct(block);
1086 }
1087
1088 static av_cold int encode_init(AVCodecContext *avctx)
1089 {
1090     ProresContext *ctx = avctx->priv_data;
1091     int mps;
1092     int i, j;
1093     int min_quant, max_quant;
1094     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1095
1096     avctx->bits_per_raw_sample = 10;
1097     avctx->coded_frame = av_frame_alloc();
1098     if (!avctx->coded_frame)
1099         return AVERROR(ENOMEM);
1100
1101     ctx->fdct      = prores_fdct;
1102     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1103                                 : ff_prores_progressive_scan;
1104     ff_fdctdsp_init(&ctx->fdsp, avctx);
1105
1106     mps = ctx->mbs_per_slice;
1107     if (mps & (mps - 1)) {
1108         av_log(avctx, AV_LOG_ERROR,
1109                "there should be an integer power of two MBs per slice\n");
1110         return AVERROR(EINVAL);
1111     }
1112     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1113         if (ctx->alpha_bits & 7) {
1114             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1115             return AVERROR(EINVAL);
1116         }
1117     } else {
1118         ctx->alpha_bits = 0;
1119     }
1120
1121     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1122                          ? CFACTOR_Y422
1123                          : CFACTOR_Y444;
1124     ctx->profile_info  = prores_profile_info + ctx->profile;
1125     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1126
1127     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1128
1129     if (interlaced)
1130         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1131     else
1132         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1133
1134     ctx->slices_width  = ctx->mb_width / mps;
1135     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1136     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1137     ctx->pictures_per_frame = 1 + interlaced;
1138
1139     if (ctx->quant_sel == -1)
1140         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1141     else
1142         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1143
1144     if (strlen(ctx->vendor) != 4) {
1145         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1146         return AVERROR_INVALIDDATA;
1147     }
1148
1149     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1150     if (!ctx->force_quant) {
1151         if (!ctx->bits_per_mb) {
1152             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1153                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1154                                            ctx->pictures_per_frame)
1155                     break;
1156             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1157         } else if (ctx->bits_per_mb < 128) {
1158             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1159             return AVERROR_INVALIDDATA;
1160         }
1161
1162         min_quant = ctx->profile_info->min_quant;
1163         max_quant = ctx->profile_info->max_quant;
1164         for (i = min_quant; i < MAX_STORED_Q; i++) {
1165             for (j = 0; j < 64; j++)
1166                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1167         }
1168
1169         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1170         if (!ctx->slice_q) {
1171             encode_close(avctx);
1172             return AVERROR(ENOMEM);
1173         }
1174
1175         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1176         if (!ctx->tdata) {
1177             encode_close(avctx);
1178             return AVERROR(ENOMEM);
1179         }
1180
1181         for (j = 0; j < avctx->thread_count; j++) {
1182             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1183                                             * TRELLIS_WIDTH
1184                                             * sizeof(*ctx->tdata->nodes));
1185             if (!ctx->tdata[j].nodes) {
1186                 encode_close(avctx);
1187                 return AVERROR(ENOMEM);
1188             }
1189             for (i = min_quant; i < max_quant + 2; i++) {
1190                 ctx->tdata[j].nodes[i].prev_node = -1;
1191                 ctx->tdata[j].nodes[i].bits      = 0;
1192                 ctx->tdata[j].nodes[i].score     = 0;
1193             }
1194         }
1195     } else {
1196         int ls = 0;
1197
1198         if (ctx->force_quant > 64) {
1199             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1200             return AVERROR_INVALIDDATA;
1201         }
1202
1203         for (j = 0; j < 64; j++) {
1204             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1205             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1206         }
1207
1208         ctx->bits_per_mb = ls * 8;
1209         if (ctx->chroma_factor == CFACTOR_Y444)
1210             ctx->bits_per_mb += ls * 4;
1211         if (ctx->num_planes == 4)
1212             ctx->bits_per_mb += ls * 4;
1213     }
1214
1215     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1216                                   ctx->slices_per_picture *
1217                                   (2 + 2 * ctx->num_planes +
1218                                    (mps * ctx->bits_per_mb) / 8)
1219                                   + 200;
1220
1221     avctx->codec_tag   = ctx->profile_info->tag;
1222
1223     av_log(avctx, AV_LOG_DEBUG,
1224            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1225            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1226            interlaced ? "yes" : "no", ctx->bits_per_mb);
1227     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1228            ctx->frame_size_upper_bound);
1229
1230     return 0;
1231 }
1232
1233 #define OFFSET(x) offsetof(ProresContext, x)
1234 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1235
1236 static const AVOption options[] = {
1237     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1238         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1239     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1240         { .i64 = PRORES_PROFILE_STANDARD },
1241         PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1242     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1243         0, 0, VE, "profile" },
1244     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1245         0, 0, VE, "profile" },
1246     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1247         0, 0, VE, "profile" },
1248     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1249         0, 0, VE, "profile" },
1250     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1251         0, 0, VE, "profile" },
1252     { "vendor", "vendor ID", OFFSET(vendor),
1253         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1254     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1255         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1256     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1257         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1258     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1259         0, 0, VE, "quant_mat" },
1260     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1261         0, 0, VE, "quant_mat" },
1262     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1263         0, 0, VE, "quant_mat" },
1264     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1265         0, 0, VE, "quant_mat" },
1266     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1267         0, 0, VE, "quant_mat" },
1268     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1269         0, 0, VE, "quant_mat" },
1270     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1271         { .i64 = 16 }, 0, 16, VE },
1272     { NULL }
1273 };
1274
1275 static const AVClass proresenc_class = {
1276     .class_name = "ProRes encoder",
1277     .item_name  = av_default_item_name,
1278     .option     = options,
1279     .version    = LIBAVUTIL_VERSION_INT,
1280 };
1281
1282 AVCodec ff_prores_ks_encoder = {
1283     .name           = "prores_ks",
1284     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1285     .type           = AVMEDIA_TYPE_VIDEO,
1286     .id             = AV_CODEC_ID_PRORES,
1287     .priv_data_size = sizeof(ProresContext),
1288     .init           = encode_init,
1289     .close          = encode_close,
1290     .encode2        = encode_frame,
1291     .capabilities   = CODEC_CAP_SLICE_THREADS,
1292     .pix_fmts       = (const enum AVPixelFormat[]) {
1293                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1294                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1295                       },
1296     .priv_class     = &proresenc_class,
1297 };