git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This encoder appears to be based on Anatoliy Wassermans considering
   7  * similarities in the bugs.
   8  *
   9  * This file is part of Libav.
  10  *
  11  * Libav is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * Libav is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with Libav; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/opt.h"
  27 #include "avcodec.h"
  28 #include "put_bits.h"
  29 #include "bytestream.h"
  30 #include "internal.h"
  31 #include "proresdsp.h"
  32 #include "proresdata.h"
  33
  34 #define CFACTOR_Y422 2
  35 #define CFACTOR_Y444 3
  36
  37 #define MAX_MBS_PER_SLICE 8
  38
  39 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  40
  41 enum {
  42     PRORES_PROFILE_PROXY = 0,
  43     PRORES_PROFILE_LT,
  44     PRORES_PROFILE_STANDARD,
  45     PRORES_PROFILE_HQ,
  46 };
  47
  48 enum {
  49     QUANT_MAT_PROXY = 0,
  50     QUANT_MAT_LT,
  51     QUANT_MAT_STANDARD,
  52     QUANT_MAT_HQ,
  53     QUANT_MAT_DEFAULT,
  54 };
  55
  56 static const uint8_t prores_quant_matrices[][64] = {
  57     { // proxy
  58          4,  7,  9, 11, 13, 14, 15, 63,
  59          7,  7, 11, 12, 14, 15, 63, 63,
  60          9, 11, 13, 14, 15, 63, 63, 63,
  61         11, 11, 13, 14, 63, 63, 63, 63,
  62         11, 13, 14, 63, 63, 63, 63, 63,
  63         13, 14, 63, 63, 63, 63, 63, 63,
  64         13, 63, 63, 63, 63, 63, 63, 63,
  65         63, 63, 63, 63, 63, 63, 63, 63,
  66     },
  67     { // LT
  68          4,  5,  6,  7,  9, 11, 13, 15,
  69          5,  5,  7,  8, 11, 13, 15, 17,
  70          6,  7,  9, 11, 13, 15, 15, 17,
  71          7,  7,  9, 11, 13, 15, 17, 19,
  72          7,  9, 11, 13, 14, 16, 19, 23,
  73          9, 11, 13, 14, 16, 19, 23, 29,
  74          9, 11, 13, 15, 17, 21, 28, 35,
  75         11, 13, 16, 17, 21, 28, 35, 41,
  76     },
  77     { // standard
  78          4,  4,  5,  5,  6,  7,  7,  9,
  79          4,  4,  5,  6,  7,  7,  9,  9,
  80          5,  5,  6,  7,  7,  9,  9, 10,
  81          5,  5,  6,  7,  7,  9,  9, 10,
  82          5,  6,  7,  7,  8,  9, 10, 12,
  83          6,  7,  7,  8,  9, 10, 12, 15,
  84          6,  7,  7,  9, 10, 11, 14, 17,
  85          7,  7,  9, 10, 11, 14, 17, 21,
  86     },
  87     { // high quality
  88          4,  4,  4,  4,  4,  4,  4,  4,
  89          4,  4,  4,  4,  4,  4,  4,  4,
  90          4,  4,  4,  4,  4,  4,  4,  4,
  91          4,  4,  4,  4,  4,  4,  4,  5,
  92          4,  4,  4,  4,  4,  4,  5,  5,
  93          4,  4,  4,  4,  4,  5,  5,  6,
  94          4,  4,  4,  4,  5,  5,  6,  7,
  95          4,  4,  4,  4,  5,  6,  7,  7,
  96     },
  97     { // codec default
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103          4,  4,  4,  4,  4,  4,  4,  4,
 104          4,  4,  4,  4,  4,  4,  4,  4,
 105          4,  4,  4,  4,  4,  4,  4,  4,
 106     },
 107 };
 108
 109 #define NUM_MB_LIMITS 4
 110 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 111     1620, // up to 720x576
 112     2700, // up to 960x720
 113     6075, // up to 1440x1080
 114     9216, // up to 2048x1152
 115 };
 116
 117 static const struct prores_profile {
 118     const char *full_name;
 119     uint32_t    tag;
 120     int         min_quant;
 121     int         max_quant;
 122     int         br_tab[NUM_MB_LIMITS];
 123     int         quant;
 124 } prores_profile_info[4] = {
 125     {
 126         .full_name = "proxy",
 127         .tag       = MKTAG('a', 'p', 'c', 'o'),
 128         .min_quant = 4,
 129         .max_quant = 8,
 130         .br_tab    = { 300, 242, 220, 194 },
 131         .quant     = QUANT_MAT_PROXY,
 132     },
 133     {
 134         .full_name = "LT",
 135         .tag       = MKTAG('a', 'p', 'c', 's'),
 136         .min_quant = 1,
 137         .max_quant = 9,
 138         .br_tab    = { 720, 560, 490, 440 },
 139         .quant     = QUANT_MAT_LT,
 140     },
 141     {
 142         .full_name = "standard",
 143         .tag       = MKTAG('a', 'p', 'c', 'n'),
 144         .min_quant = 1,
 145         .max_quant = 6,
 146         .br_tab    = { 1050, 808, 710, 632 },
 147         .quant     = QUANT_MAT_STANDARD,
 148     },
 149     {
 150         .full_name = "high quality",
 151         .tag       = MKTAG('a', 'p', 'c', 'h'),
 152         .min_quant = 1,
 153         .max_quant = 6,
 154         .br_tab    = { 1566, 1216, 1070, 950 },
 155         .quant     = QUANT_MAT_HQ,
 156     }
 157 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 158 };
 159
 160 #define TRELLIS_WIDTH 16
 161 #define SCORE_LIMIT   INT_MAX / 2
 162
 163 struct TrellisNode {
 164     int prev_node;
 165     int quant;
 166     int bits;
 167     int score;
 168 };
 169
 170 #define MAX_STORED_Q 16
 171
 172 typedef struct ProresContext {
 173     AVClass *class;
 174     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 175     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 176     int16_t quants[MAX_STORED_Q][64];
 177     int16_t custom_q[64];
 178     const uint8_t *quant_mat;
 179
 180     ProresDSPContext dsp;
 181     ScanTable  scantable;
 182
 183     int mb_width, mb_height;
 184     int mbs_per_slice;
 185     int num_chroma_blocks, chroma_factor;
 186     int slices_width;
 187     int num_slices;
 188     int num_planes;
 189     int bits_per_mb;
 190     int force_quant;
 191
 192     char *vendor;
 193     int quant_sel;
 194
 195     int frame_size;
 196
 197     int profile;
 198     const struct prores_profile *profile_info;
 199
 200     struct TrellisNode *nodes;
 201     int *slice_q;
 202 } ProresContext;
 203
 204 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 205                            int linesize, int x, int y, int w, int h,
 206                            DCTELEM *blocks,
 207                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 208 {
 209     const uint16_t *esrc;
 210     const int mb_width = 4 * blocks_per_mb;
 211     int elinesize;
 212     int i, j, k;
 213
 214     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 215         if (x >= w) {
 216             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 217                               * sizeof(*blocks));
 218             return;
 219         }
 220         if (x + mb_width <= w && y + 16 <= h) {
 221             esrc      = src;
 222             elinesize = linesize;
 223         } else {
 224             int bw, bh, pix;
 225
 226             esrc      = ctx->emu_buf;
 227             elinesize = 16 * sizeof(*ctx->emu_buf);
 228
 229             bw = FFMIN(w - x, mb_width);
 230             bh = FFMIN(h - y, 16);
 231
 232             for (j = 0; j < bh; j++) {
 233                 memcpy(ctx->emu_buf + j * 16,
 234                        (const uint8_t*)src + j * linesize,
 235                        bw * sizeof(*src));
 236                 pix = ctx->emu_buf[j * 16 + bw - 1];
 237                 for (k = bw; k < mb_width; k++)
 238                     ctx->emu_buf[j * 16 + k] = pix;
 239             }
 240             for (; j < 16; j++)
 241                 memcpy(ctx->emu_buf + j * 16,
 242                        ctx->emu_buf + (bh - 1) * 16,
 243                        mb_width * sizeof(*ctx->emu_buf));
 244         }
 245         if (!is_chroma) {
 246             ctx->dsp.fdct(esrc, elinesize, blocks);
 247             blocks += 64;
 248             if (blocks_per_mb > 2) {
 249                 ctx->dsp.fdct(src + 8, linesize, blocks);
 250                 blocks += 64;
 251             }
 252             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 253             blocks += 64;
 254             if (blocks_per_mb > 2) {
 255                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 256                 blocks += 64;
 257             }
 258         } else {
 259             ctx->dsp.fdct(esrc, elinesize, blocks);
 260             blocks += 64;
 261             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 262             blocks += 64;
 263             if (blocks_per_mb > 2) {
 264                 ctx->dsp.fdct(src + 8, linesize, blocks);
 265                 blocks += 64;
 266                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 267                 blocks += 64;
 268             }
 269         }
 270
 271         x += mb_width;
 272     }
 273 }
 274
 275 /**
 276  * Write an unsigned rice/exp golomb codeword.
 277  */
 278 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 279 {
 280     unsigned int rice_order, exp_order, switch_bits, switch_val;
 281     int exponent;
 282
 283     /* number of prefix bits to switch between Rice and expGolomb */
 284     switch_bits = (codebook & 3) + 1;
 285     rice_order  =  codebook >> 5;       /* rice code order */
 286     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 287
 288     switch_val  = switch_bits << rice_order;
 289
 290     if (val >= switch_val) {
 291         val -= switch_val - (1 << exp_order);
 292         exponent = av_log2(val);
 293
 294         put_bits(pb, exponent - exp_order + switch_bits, 0);
 295         put_bits(pb, exponent + 1, val);
 296     } else {
 297         exponent = val >> rice_order;
 298
 299         if (exponent)
 300             put_bits(pb, exponent, 0);
 301         put_bits(pb, 1, 1);
 302         if (rice_order)
 303             put_sbits(pb, rice_order, val);
 304     }
 305 }
 306
 307 #define GET_SIGN(x)  ((x) >> 31)
 308 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 309
 310 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 311                        int blocks_per_slice, int scale)
 312 {
 313     int i;
 314     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 315
 316     prev_dc = (blocks[0] - 0x4000) / scale;
 317     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 318     sign     = 0;
 319     codebook = 3;
 320     blocks  += 64;
 321
 322     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 323         dc       = (blocks[0] - 0x4000) / scale;
 324         delta    = dc - prev_dc;
 325         new_sign = GET_SIGN(delta);
 326         delta    = (delta ^ sign) - sign;
 327         code     = MAKE_CODE(delta);
 328         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 329         codebook = (code + (code & 1)) >> 1;
 330         codebook = FFMIN(codebook, 3);
 331         sign     = new_sign;
 332         prev_dc  = dc;
 333     }
 334 }
 335
 336 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 337                        int blocks_per_slice,
 338                        int plane_size_factor,
 339                        const uint8_t *scan, const int16_t *qmat)
 340 {
 341     int idx, i;
 342     int run, level, run_cb, lev_cb;
 343     int max_coeffs, abs_level;
 344
 345     max_coeffs = blocks_per_slice << 6;
 346     run_cb     = ff_prores_run_to_cb_index[4];
 347     lev_cb     = ff_prores_lev_to_cb_index[2];
 348     run        = 0;
 349
 350     for (i = 1; i < 64; i++) {
 351         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 352             level = blocks[idx] / qmat[scan[i]];
 353             if (level) {
 354                 abs_level = FFABS(level);
 355                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 356                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 357                                     abs_level - 1);
 358                 put_sbits(pb, 1, GET_SIGN(level));
 359
 360                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 361                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 362                 run    = 0;
 363             } else {
 364                 run++;
 365             }
 366         }
 367     }
 368 }
 369
 370 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 371                               const uint16_t *src, int linesize,
 372                               int mbs_per_slice, DCTELEM *blocks,
 373                               int blocks_per_mb, int plane_size_factor,
 374                               const int16_t *qmat)
 375 {
 376     int blocks_per_slice, saved_pos;
 377
 378     saved_pos = put_bits_count(pb);
 379     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 380
 381     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 382     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 383                ctx->scantable.permutated, qmat);
 384     flush_put_bits(pb);
 385
 386     return (put_bits_count(pb) - saved_pos) >> 3;
 387 }
 388
 389 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 390                         PutBitContext *pb,
 391                         int sizes[4], int x, int y, int quant,
 392                         int mbs_per_slice)
 393 {
 394     ProresContext *ctx = avctx->priv_data;
 395     int i, xp, yp;
 396     int total_size = 0;
 397     const uint16_t *src;
 398     int slice_width_factor = av_log2(mbs_per_slice);
 399     int num_cblocks, pwidth;
 400     int plane_factor, is_chroma;
 401     uint16_t *qmat;
 402
 403     if (ctx->force_quant) {
 404         qmat = ctx->quants[0];
 405     } else if (quant < MAX_STORED_Q) {
 406         qmat = ctx->quants[quant];
 407     } else {
 408         qmat = ctx->custom_q;
 409         for (i = 0; i < 64; i++)
 410             qmat[i] = ctx->quant_mat[i] * quant;
 411     }
 412
 413     for (i = 0; i < ctx->num_planes; i++) {
 414         is_chroma    = (i == 1 || i == 2);
 415         plane_factor = slice_width_factor + 2;
 416         if (is_chroma)
 417             plane_factor += ctx->chroma_factor - 3;
 418         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 419             xp          = x << 4;
 420             yp          = y << 4;
 421             num_cblocks = 4;
 422             pwidth      = avctx->width;
 423         } else {
 424             xp          = x << 3;
 425             yp          = y << 4;
 426             num_cblocks = 2;
 427             pwidth      = avctx->width >> 1;
 428         }
 429         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 430
 431         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 432                        pwidth, avctx->height, ctx->blocks[0],
 433                        mbs_per_slice, num_cblocks, is_chroma);
 434         sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
 435                                       mbs_per_slice, ctx->blocks[0],
 436                                       num_cblocks, plane_factor,
 437                                       qmat);
 438         total_size += sizes[i];
 439     }
 440     return total_size;
 441 }
 442
 443 static inline int estimate_vlc(unsigned codebook, int val)
 444 {
 445     unsigned int rice_order, exp_order, switch_bits, switch_val;
 446     int exponent;
 447
 448     /* number of prefix bits to switch between Rice and expGolomb */
 449     switch_bits = (codebook & 3) + 1;
 450     rice_order  =  codebook >> 5;       /* rice code order */
 451     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 452
 453     switch_val  = switch_bits << rice_order;
 454
 455     if (val >= switch_val) {
 456         val -= switch_val - (1 << exp_order);
 457         exponent = av_log2(val);
 458
 459         return exponent * 2 - exp_order + switch_bits + 1;
 460     } else {
 461         return (val >> rice_order) + rice_order + 1;
 462     }
 463 }
 464
 465 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 466                         int scale)
 467 {
 468     int i;
 469     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 470     int bits;
 471
 472     prev_dc  = (blocks[0] - 0x4000) / scale;
 473     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 474     sign     = 0;
 475     codebook = 3;
 476     blocks  += 64;
 477     *error  += FFABS(blocks[0] - 0x4000) % scale;
 478
 479     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 480         dc       = (blocks[0] - 0x4000) / scale;
 481         *error  += FFABS(blocks[0] - 0x4000) % scale;
 482         delta    = dc - prev_dc;
 483         new_sign = GET_SIGN(delta);
 484         delta    = (delta ^ sign) - sign;
 485         code     = MAKE_CODE(delta);
 486         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 487         codebook = (code + (code & 1)) >> 1;
 488         codebook = FFMIN(codebook, 3);
 489         sign     = new_sign;
 490         prev_dc  = dc;
 491     }
 492
 493     return bits;
 494 }
 495
 496 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 497                         int plane_size_factor,
 498                         const uint8_t *scan, const int16_t *qmat)
 499 {
 500     int idx, i;
 501     int run, level, run_cb, lev_cb;
 502     int max_coeffs, abs_level;
 503     int bits = 0;
 504
 505     max_coeffs = blocks_per_slice << 6;
 506     run_cb     = ff_prores_run_to_cb_index[4];
 507     lev_cb     = ff_prores_lev_to_cb_index[2];
 508     run        = 0;
 509
 510     for (i = 1; i < 64; i++) {
 511         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 512             level   = blocks[idx] / qmat[scan[i]];
 513             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 514             if (level) {
 515                 abs_level = FFABS(level);
 516                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 517                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 518                                      abs_level - 1) + 1;
 519
 520                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 521                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 522                 run    = 0;
 523             } else {
 524                 run++;
 525             }
 526         }
 527     }
 528
 529     return bits;
 530 }
 531
 532 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 533                                 const uint16_t *src, int linesize,
 534                                 int mbs_per_slice,
 535                                 int blocks_per_mb, int plane_size_factor,
 536                                 const int16_t *qmat)
 537 {
 538     int blocks_per_slice;
 539     int bits;
 540
 541     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 542
 543     bits  = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
 544     bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
 545                          plane_size_factor, ctx->scantable.permutated, qmat);
 546
 547     return FFALIGN(bits, 8);
 548 }
 549
 550 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 551                             int trellis_node, int x, int y, int mbs_per_slice)
 552 {
 553     ProresContext *ctx = avctx->priv_data;
 554     int i, q, pq, xp, yp;
 555     const uint16_t *src;
 556     int slice_width_factor = av_log2(mbs_per_slice);
 557     int num_cblocks[MAX_PLANES], pwidth;
 558     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 559     const int min_quant = ctx->profile_info->min_quant;
 560     const int max_quant = ctx->profile_info->max_quant;
 561     int error, bits, bits_limit;
 562     int mbs, prev, cur, new_score;
 563     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 564     int overquant;
 565     uint16_t *qmat;
 566
 567     mbs = x + mbs_per_slice;
 568
 569     for (i = 0; i < ctx->num_planes; i++) {
 570         is_chroma[i]    = (i == 1 || i == 2);
 571         plane_factor[i] = slice_width_factor + 2;
 572         if (is_chroma[i])
 573             plane_factor[i] += ctx->chroma_factor - 3;
 574         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 575             xp             = x << 4;
 576             yp             = y << 4;
 577             num_cblocks[i] = 4;
 578             pwidth         = avctx->width;
 579         } else {
 580             xp             = x << 3;
 581             yp             = y << 4;
 582             num_cblocks[i] = 2;
 583             pwidth         = avctx->width >> 1;
 584         }
 585         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 586
 587         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 588                        pwidth, avctx->height, ctx->blocks[i],
 589                        mbs_per_slice, num_cblocks[i], is_chroma[i]);
 590     }
 591
 592     for (q = min_quant; q < max_quant + 2; q++) {
 593         ctx->nodes[trellis_node + q].prev_node = -1;
 594         ctx->nodes[trellis_node + q].quant     = q;
 595     }
 596
 597     // todo: maybe perform coarser quantising to fit into frame size when needed
 598     for (q = min_quant; q <= max_quant; q++) {
 599         bits  = 0;
 600         error = 0;
 601         for (i = 0; i < ctx->num_planes; i++) {
 602             bits += estimate_slice_plane(ctx, &error, i,
 603                                          src, pic->linesize[i],
 604                                          mbs_per_slice,
 605                                          num_cblocks[i], plane_factor[i],
 606                                          ctx->quants[q]);
 607         }
 608         if (bits > 65000 * 8) {
 609             error = SCORE_LIMIT;
 610             break;
 611         }
 612         slice_bits[q]  = bits;
 613         slice_score[q] = error;
 614     }
 615     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 616         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 617         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 618         overquant = max_quant;
 619     } else {
 620         for (q = max_quant + 1; q < 128; q++) {
 621             bits  = 0;
 622             error = 0;
 623             if (q < MAX_STORED_Q) {
 624                 qmat = ctx->quants[q];
 625             } else {
 626                 qmat = ctx->custom_q;
 627                 for (i = 0; i < 64; i++)
 628                     qmat[i] = ctx->quant_mat[i] * q;
 629             }
 630             for (i = 0; i < ctx->num_planes; i++) {
 631                 bits += estimate_slice_plane(ctx, &error, i,
 632                                              src, pic->linesize[i],
 633                                              mbs_per_slice,
 634                                              num_cblocks[i], plane_factor[i],
 635                                              qmat);
 636             }
 637             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 638                 break;
 639         }
 640
 641         slice_bits[max_quant + 1]  = bits;
 642         slice_score[max_quant + 1] = error;
 643         overquant = q;
 644     }
 645     ctx->nodes[trellis_node + max_quant + 1].quant = overquant;
 646
 647     bits_limit = mbs * ctx->bits_per_mb;
 648     for (pq = min_quant; pq < max_quant + 2; pq++) {
 649         prev = trellis_node - TRELLIS_WIDTH + pq;
 650
 651         for (q = min_quant; q < max_quant + 2; q++) {
 652             cur = trellis_node + q;
 653
 654             bits  = ctx->nodes[prev].bits + slice_bits[q];
 655             error = slice_score[q];
 656             if (bits > bits_limit)
 657                 error = SCORE_LIMIT;
 658
 659             if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 660                 new_score = ctx->nodes[prev].score + error;
 661             else
 662                 new_score = SCORE_LIMIT;
 663             if (ctx->nodes[cur].prev_node == -1 ||
 664                 ctx->nodes[cur].score >= new_score) {
 665
 666                 ctx->nodes[cur].bits      = bits;
 667                 ctx->nodes[cur].score     = new_score;
 668                 ctx->nodes[cur].prev_node = prev;
 669             }
 670         }
 671     }
 672
 673     error = ctx->nodes[trellis_node + min_quant].score;
 674     pq    = trellis_node + min_quant;
 675     for (q = min_quant + 1; q < max_quant + 2; q++) {
 676         if (ctx->nodes[trellis_node + q].score <= error) {
 677             error = ctx->nodes[trellis_node + q].score;
 678             pq    = trellis_node + q;
 679         }
 680     }
 681
 682     return pq;
 683 }
 684
 685 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 686                         const AVFrame *pic, int *got_packet)
 687 {
 688     ProresContext *ctx = avctx->priv_data;
 689     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 690     uint8_t *picture_size_pos;
 691     PutBitContext pb;
 692     int x, y, i, mb, q = 0;
 693     int sizes[4] = { 0 };
 694     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 695     int frame_size, picture_size, slice_size;
 696     int mbs_per_slice = ctx->mbs_per_slice;
 697     int pkt_size, ret;
 698
 699     *avctx->coded_frame           = *pic;
 700     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 701     avctx->coded_frame->key_frame = 1;
 702
 703     pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
 704
 705     if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
 706         return ret;
 707
 708     orig_buf = pkt->data;
 709
 710     // frame atom
 711     orig_buf += 4;                              // frame size
 712     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 713     buf = orig_buf;
 714
 715     // frame header
 716     tmp = buf;
 717     buf += 2;                                   // frame header size will be stored here
 718     bytestream_put_be16  (&buf, 0);             // version 1
 719     bytestream_put_buffer(&buf, ctx->vendor, 4);
 720     bytestream_put_be16  (&buf, avctx->width);
 721     bytestream_put_be16  (&buf, avctx->height);
 722     bytestream_put_byte  (&buf, ctx->chroma_factor << 6); // frame flags
 723     bytestream_put_byte  (&buf, 0);             // reserved
 724     bytestream_put_byte  (&buf, avctx->color_primaries);
 725     bytestream_put_byte  (&buf, avctx->color_trc);
 726     bytestream_put_byte  (&buf, avctx->colorspace);
 727     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 728     bytestream_put_byte  (&buf, 0);             // reserved
 729     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 730         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 731         // luma quantisation matrix
 732         for (i = 0; i < 64; i++)
 733             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 734         // chroma quantisation matrix
 735         for (i = 0; i < 64; i++)
 736             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 737     } else {
 738         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 739     }
 740     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 741
 742     // picture header
 743     picture_size_pos = buf + 1;
 744     bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 745     buf += 4;                                   // picture data size will be stored here
 746     bytestream_put_be16  (&buf, ctx->num_slices); // total number of slices
 747     bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 748
 749     // seek table - will be filled during slice encoding
 750     slice_sizes = buf;
 751     buf += ctx->num_slices * 2;
 752
 753     // slices
 754     for (y = 0; y < ctx->mb_height; y++) {
 755         mbs_per_slice = ctx->mbs_per_slice;
 756         if (!ctx->force_quant) {
 757             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 758                 while (ctx->mb_width - x < mbs_per_slice)
 759                     mbs_per_slice >>= 1;
 760                 q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
 761                                      mbs_per_slice);
 762             }
 763
 764             for (x = ctx->slices_width - 1; x >= 0; x--) {
 765                 ctx->slice_q[x] = ctx->nodes[q].quant;
 766                 q = ctx->nodes[q].prev_node;
 767             }
 768         }
 769
 770         mbs_per_slice = ctx->mbs_per_slice;
 771         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 772             q = ctx->force_quant ? ctx->force_quant : ctx->slice_q[mb];
 773
 774             while (ctx->mb_width - x < mbs_per_slice)
 775                 mbs_per_slice >>= 1;
 776
 777             bytestream_put_byte(&buf, slice_hdr_size << 3);
 778             slice_hdr = buf;
 779             buf += slice_hdr_size - 1;
 780             init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 781             encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 782
 783             bytestream_put_byte(&slice_hdr, q);
 784             slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 785             for (i = 0; i < ctx->num_planes - 1; i++) {
 786                 bytestream_put_be16(&slice_hdr, sizes[i]);
 787                 slice_size += sizes[i];
 788             }
 789             bytestream_put_be16(&slice_sizes, slice_size);
 790             buf += slice_size - slice_hdr_size;
 791         }
 792     }
 793
 794     orig_buf -= 8;
 795     frame_size = buf - orig_buf;
 796     picture_size = buf - picture_size_pos - 6;
 797     bytestream_put_be32(&orig_buf, frame_size);
 798     bytestream_put_be32(&picture_size_pos, picture_size);
 799
 800     pkt->size   = frame_size;
 801     pkt->flags |= AV_PKT_FLAG_KEY;
 802     *got_packet = 1;
 803
 804     return 0;
 805 }
 806
 807 static av_cold int encode_close(AVCodecContext *avctx)
 808 {
 809     ProresContext *ctx = avctx->priv_data;
 810
 811     if (avctx->coded_frame->data[0])
 812         avctx->release_buffer(avctx, avctx->coded_frame);
 813
 814     av_freep(&avctx->coded_frame);
 815
 816     av_freep(&ctx->nodes);
 817     av_freep(&ctx->slice_q);
 818
 819     return 0;
 820 }
 821
 822 static av_cold int encode_init(AVCodecContext *avctx)
 823 {
 824     ProresContext *ctx = avctx->priv_data;
 825     int mps;
 826     int i, j;
 827     int min_quant, max_quant;
 828
 829     avctx->bits_per_raw_sample = 10;
 830     avctx->coded_frame = avcodec_alloc_frame();
 831     if (!avctx->coded_frame)
 832         return AVERROR(ENOMEM);
 833
 834     ff_proresdsp_init(&ctx->dsp, avctx);
 835     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 836                       ff_prores_progressive_scan);
 837
 838     mps = ctx->mbs_per_slice;
 839     if (mps & (mps - 1)) {
 840         av_log(avctx, AV_LOG_ERROR,
 841                "there should be an integer power of two MBs per slice\n");
 842         return AVERROR(EINVAL);
 843     }
 844
 845     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 846                          ? CFACTOR_Y422
 847                          : CFACTOR_Y444;
 848     ctx->profile_info  = prores_profile_info + ctx->profile;
 849     ctx->num_planes    = 3;
 850
 851     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 852     ctx->mb_height     = FFALIGN(avctx->height, 16) >> 4;
 853     ctx->slices_width  = ctx->mb_width / mps;
 854     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 855     ctx->num_slices    = ctx->mb_height * ctx->slices_width;
 856
 857     if (ctx->quant_sel == -1)
 858         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
 859     else
 860         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
 861
 862     if (strlen(ctx->vendor) != 4) {
 863         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 864         return AVERROR_INVALIDDATA;
 865     }
 866
 867     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
 868     if (!ctx->force_quant) {
 869         if (!ctx->bits_per_mb) {
 870             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 871                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
 872                     break;
 873             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 874         } else if (ctx->bits_per_mb < 128) {
 875             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
 876             return AVERROR_INVALIDDATA;
 877         }
 878
 879         min_quant = ctx->profile_info->min_quant;
 880         max_quant = ctx->profile_info->max_quant;
 881         for (i = min_quant; i < MAX_STORED_Q; i++) {
 882             for (j = 0; j < 64; j++)
 883                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
 884         }
 885
 886         ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
 887                                * sizeof(*ctx->nodes));
 888         if (!ctx->nodes) {
 889             encode_close(avctx);
 890             return AVERROR(ENOMEM);
 891         }
 892         for (i = min_quant; i < max_quant + 2; i++) {
 893             ctx->nodes[i].prev_node = -1;
 894             ctx->nodes[i].bits      = 0;
 895             ctx->nodes[i].score     = 0;
 896         }
 897
 898         ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
 899         if (!ctx->slice_q) {
 900             encode_close(avctx);
 901             return AVERROR(ENOMEM);
 902         }
 903     } else {
 904         int ls = 0;
 905
 906         if (ctx->force_quant > 64) {
 907             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
 908             return AVERROR_INVALIDDATA;
 909         }
 910
 911         for (j = 0; j < 64; j++) {
 912             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
 913             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
 914         }
 915
 916         ctx->bits_per_mb = ls * 8;
 917         if (ctx->chroma_factor == CFACTOR_Y444)
 918             ctx->bits_per_mb += ls * 4;
 919         if (ctx->num_planes == 4)
 920             ctx->bits_per_mb += ls * 4;
 921     }
 922
 923     ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
 924                                          + (2 * mps * ctx->bits_per_mb) / 8)
 925                       + 200;
 926
 927     avctx->codec_tag   = ctx->profile_info->tag;
 928
 929     av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
 930            ctx->profile, ctx->num_slices, ctx->bits_per_mb);
 931     av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
 932            ctx->frame_size);
 933
 934     return 0;
 935 }
 936
 937 #define OFFSET(x) offsetof(ProresContext, x)
 938 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 939
 940 static const AVOption options[] = {
 941     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
 942         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
 943     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
 944         { PRORES_PROFILE_STANDARD },
 945         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
 946     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
 947         0, 0, VE, "profile" },
 948     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
 949         0, 0, VE, "profile" },
 950     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
 951         0, 0, VE, "profile" },
 952     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
 953         0, 0, VE, "profile" },
 954     { "vendor", "vendor ID", OFFSET(vendor),
 955         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
 956     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
 957         AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE },
 958     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
 959         { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
 960     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { -1 },
 961         0, 0, VE, "quant_mat" },
 962     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY },
 963         0, 0, VE, "quant_mat" },
 964     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT },
 965         0, 0, VE, "quant_mat" },
 966     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD },
 967         0, 0, VE, "quant_mat" },
 968     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ },
 969         0, 0, VE, "quant_mat" },
 970     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT },
 971         0, 0, VE, "quant_mat" },
 972     { NULL }
 973 };
 974
 975 static const AVClass proresenc_class = {
 976     .class_name = "ProRes encoder",
 977     .item_name  = av_default_item_name,
 978     .option     = options,
 979     .version    = LIBAVUTIL_VERSION_INT,
 980 };
 981
 982 AVCodec ff_prores_kostya_encoder = {
 983     .name           = "prores_kostya",
 984     .type           = AVMEDIA_TYPE_VIDEO,
 985     .id             = CODEC_ID_PRORES,
 986     .priv_data_size = sizeof(ProresContext),
 987     .init           = encode_init,
 988     .close          = encode_close,
 989     .encode2        = encode_frame,
 990     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
 991     .pix_fmts       = (const enum PixelFormat[]) {
 992                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
 993                       },
 994     .priv_class     = &proresenc_class,
 995 };