git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 enum {
  46     QUANT_MAT_PROXY = 0,
  47     QUANT_MAT_LT,
  48     QUANT_MAT_STANDARD,
  49     QUANT_MAT_HQ,
  50     QUANT_MAT_DEFAULT,
  51 };
  52
  53 static const uint8_t prores_quant_matrices[][64] = {
  54     { // proxy
  55          4,  7,  9, 11, 13, 14, 15, 63,
  56          7,  7, 11, 12, 14, 15, 63, 63,
  57          9, 11, 13, 14, 15, 63, 63, 63,
  58         11, 11, 13, 14, 63, 63, 63, 63,
  59         11, 13, 14, 63, 63, 63, 63, 63,
  60         13, 14, 63, 63, 63, 63, 63, 63,
  61         13, 63, 63, 63, 63, 63, 63, 63,
  62         63, 63, 63, 63, 63, 63, 63, 63,
  63     },
  64     { // LT
  65          4,  5,  6,  7,  9, 11, 13, 15,
  66          5,  5,  7,  8, 11, 13, 15, 17,
  67          6,  7,  9, 11, 13, 15, 15, 17,
  68          7,  7,  9, 11, 13, 15, 17, 19,
  69          7,  9, 11, 13, 14, 16, 19, 23,
  70          9, 11, 13, 14, 16, 19, 23, 29,
  71          9, 11, 13, 15, 17, 21, 28, 35,
  72         11, 13, 16, 17, 21, 28, 35, 41,
  73     },
  74     { // standard
  75          4,  4,  5,  5,  6,  7,  7,  9,
  76          4,  4,  5,  6,  7,  7,  9,  9,
  77          5,  5,  6,  7,  7,  9,  9, 10,
  78          5,  5,  6,  7,  7,  9,  9, 10,
  79          5,  6,  7,  7,  8,  9, 10, 12,
  80          6,  7,  7,  8,  9, 10, 12, 15,
  81          6,  7,  7,  9, 10, 11, 14, 17,
  82          7,  7,  9, 10, 11, 14, 17, 21,
  83     },
  84     { // high quality
  85          4,  4,  4,  4,  4,  4,  4,  4,
  86          4,  4,  4,  4,  4,  4,  4,  4,
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  5,
  89          4,  4,  4,  4,  4,  4,  5,  5,
  90          4,  4,  4,  4,  4,  5,  5,  6,
  91          4,  4,  4,  4,  5,  5,  6,  7,
  92          4,  4,  4,  4,  5,  6,  7,  7,
  93     },
  94     { // codec default
  95          4,  4,  4,  4,  4,  4,  4,  4,
  96          4,  4,  4,  4,  4,  4,  4,  4,
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103     },
 104 };
 105
 106 #define NUM_MB_LIMITS 4
 107 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 108     1620, // up to 720x576
 109     2700, // up to 960x720
 110     6075, // up to 1440x1080
 111     9216, // up to 2048x1152
 112 };
 113
 114 static const struct prores_profile {
 115     const char *full_name;
 116     uint32_t    tag;
 117     int         min_quant;
 118     int         max_quant;
 119     int         br_tab[NUM_MB_LIMITS];
 120     int         quant;
 121 } prores_profile_info[4] = {
 122     {
 123         .full_name = "proxy",
 124         .tag       = MKTAG('a', 'p', 'c', 'o'),
 125         .min_quant = 4,
 126         .max_quant = 8,
 127         .br_tab    = { 300, 242, 220, 194 },
 128         .quant     = QUANT_MAT_PROXY,
 129     },
 130     {
 131         .full_name = "LT",
 132         .tag       = MKTAG('a', 'p', 'c', 's'),
 133         .min_quant = 1,
 134         .max_quant = 9,
 135         .br_tab    = { 720, 560, 490, 440 },
 136         .quant     = QUANT_MAT_LT,
 137     },
 138     {
 139         .full_name = "standard",
 140         .tag       = MKTAG('a', 'p', 'c', 'n'),
 141         .min_quant = 1,
 142         .max_quant = 6,
 143         .br_tab    = { 1050, 808, 710, 632 },
 144         .quant     = QUANT_MAT_STANDARD,
 145     },
 146     {
 147         .full_name = "high quality",
 148         .tag       = MKTAG('a', 'p', 'c', 'h'),
 149         .min_quant = 1,
 150         .max_quant = 6,
 151         .br_tab    = { 1566, 1216, 1070, 950 },
 152         .quant     = QUANT_MAT_HQ,
 153     }
 154 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 155 };
 156
 157 #define TRELLIS_WIDTH 16
 158 #define SCORE_LIMIT   INT_MAX / 2
 159
 160 struct TrellisNode {
 161     int prev_node;
 162     int quant;
 163     int bits;
 164     int score;
 165 };
 166
 167 #define MAX_STORED_Q 16
 168
 169 typedef struct ProresThreadData {
 170     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 171     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 172     int16_t custom_q[64];
 173     struct TrellisNode *nodes;
 174 } ProresThreadData;
 175
 176 typedef struct ProresContext {
 177     AVClass *class;
 178     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 179     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 180     int16_t quants[MAX_STORED_Q][64];
 181     int16_t custom_q[64];
 182     const uint8_t *quant_mat;
 183
 184     ProresDSPContext dsp;
 185     ScanTable  scantable;
 186
 187     int mb_width, mb_height;
 188     int mbs_per_slice;
 189     int num_chroma_blocks, chroma_factor;
 190     int slices_width;
 191     int num_slices;
 192     int num_planes;
 193     int bits_per_mb;
 194     int force_quant;
 195
 196     char *vendor;
 197     int quant_sel;
 198
 199     int frame_size;
 200
 201     int profile;
 202     const struct prores_profile *profile_info;
 203
 204     int *slice_q;
 205
 206     ProresThreadData *tdata;
 207 } ProresContext;
 208
 209 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 210                            int linesize, int x, int y, int w, int h,
 211                            DCTELEM *blocks, uint16_t *emu_buf,
 212                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 213 {
 214     const uint16_t *esrc;
 215     const int mb_width = 4 * blocks_per_mb;
 216     int elinesize;
 217     int i, j, k;
 218
 219     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 220         if (x >= w) {
 221             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 222                               * sizeof(*blocks));
 223             return;
 224         }
 225         if (x + mb_width <= w && y + 16 <= h) {
 226             esrc      = src;
 227             elinesize = linesize;
 228         } else {
 229             int bw, bh, pix;
 230
 231             esrc      = emu_buf;
 232             elinesize = 16 * sizeof(*emu_buf);
 233
 234             bw = FFMIN(w - x, mb_width);
 235             bh = FFMIN(h - y, 16);
 236
 237             for (j = 0; j < bh; j++) {
 238                 memcpy(emu_buf + j * 16,
 239                        (const uint8_t*)src + j * linesize,
 240                        bw * sizeof(*src));
 241                 pix = emu_buf[j * 16 + bw - 1];
 242                 for (k = bw; k < mb_width; k++)
 243                     emu_buf[j * 16 + k] = pix;
 244             }
 245             for (; j < 16; j++)
 246                 memcpy(emu_buf + j * 16,
 247                        emu_buf + (bh - 1) * 16,
 248                        mb_width * sizeof(*emu_buf));
 249         }
 250         if (!is_chroma) {
 251             ctx->dsp.fdct(esrc, elinesize, blocks);
 252             blocks += 64;
 253             if (blocks_per_mb > 2) {
 254                 ctx->dsp.fdct(src + 8, linesize, blocks);
 255                 blocks += 64;
 256             }
 257             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 258             blocks += 64;
 259             if (blocks_per_mb > 2) {
 260                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 261                 blocks += 64;
 262             }
 263         } else {
 264             ctx->dsp.fdct(esrc, elinesize, blocks);
 265             blocks += 64;
 266             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 267             blocks += 64;
 268             if (blocks_per_mb > 2) {
 269                 ctx->dsp.fdct(src + 8, linesize, blocks);
 270                 blocks += 64;
 271                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 272                 blocks += 64;
 273             }
 274         }
 275
 276         x += mb_width;
 277     }
 278 }
 279
 280 /**
 281  * Write an unsigned rice/exp golomb codeword.
 282  */
 283 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 284 {
 285     unsigned int rice_order, exp_order, switch_bits, switch_val;
 286     int exponent;
 287
 288     /* number of prefix bits to switch between Rice and expGolomb */
 289     switch_bits = (codebook & 3) + 1;
 290     rice_order  =  codebook >> 5;       /* rice code order */
 291     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 292
 293     switch_val  = switch_bits << rice_order;
 294
 295     if (val >= switch_val) {
 296         val -= switch_val - (1 << exp_order);
 297         exponent = av_log2(val);
 298
 299         put_bits(pb, exponent - exp_order + switch_bits, 0);
 300         put_bits(pb, 1, 1);
 301         put_bits(pb, exponent, val);
 302     } else {
 303         exponent = val >> rice_order;
 304
 305         if (exponent)
 306             put_bits(pb, exponent, 0);
 307         put_bits(pb, 1, 1);
 308         if (rice_order)
 309             put_sbits(pb, rice_order, val);
 310     }
 311 }
 312
 313 #define GET_SIGN(x)  ((x) >> 31)
 314 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 315
 316 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 317                        int blocks_per_slice, int scale)
 318 {
 319     int i;
 320     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 321
 322     prev_dc = (blocks[0] - 0x4000) / scale;
 323     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 324     sign     = 0;
 325     codebook = 3;
 326     blocks  += 64;
 327
 328     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 329         dc       = (blocks[0] - 0x4000) / scale;
 330         delta    = dc - prev_dc;
 331         new_sign = GET_SIGN(delta);
 332         delta    = (delta ^ sign) - sign;
 333         code     = MAKE_CODE(delta);
 334         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 335         codebook = (code + (code & 1)) >> 1;
 336         codebook = FFMIN(codebook, 3);
 337         sign     = new_sign;
 338         prev_dc  = dc;
 339     }
 340 }
 341
 342 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 343                        int blocks_per_slice,
 344                        int plane_size_factor,
 345                        const uint8_t *scan, const int16_t *qmat)
 346 {
 347     int idx, i;
 348     int run, level, run_cb, lev_cb;
 349     int max_coeffs, abs_level;
 350
 351     max_coeffs = blocks_per_slice << 6;
 352     run_cb     = ff_prores_run_to_cb_index[4];
 353     lev_cb     = ff_prores_lev_to_cb_index[2];
 354     run        = 0;
 355
 356     for (i = 1; i < 64; i++) {
 357         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 358             level = blocks[idx] / qmat[scan[i]];
 359             if (level) {
 360                 abs_level = FFABS(level);
 361                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 362                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 363                                     abs_level - 1);
 364                 put_sbits(pb, 1, GET_SIGN(level));
 365
 366                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 367                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 368                 run    = 0;
 369             } else {
 370                 run++;
 371             }
 372         }
 373     }
 374 }
 375
 376 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 377                               const uint16_t *src, int linesize,
 378                               int mbs_per_slice, DCTELEM *blocks,
 379                               int blocks_per_mb, int plane_size_factor,
 380                               const int16_t *qmat)
 381 {
 382     int blocks_per_slice, saved_pos;
 383
 384     saved_pos = put_bits_count(pb);
 385     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 386
 387     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 388     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 389                ctx->scantable.permutated, qmat);
 390     flush_put_bits(pb);
 391
 392     return (put_bits_count(pb) - saved_pos) >> 3;
 393 }
 394
 395 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 396                         PutBitContext *pb,
 397                         int sizes[4], int x, int y, int quant,
 398                         int mbs_per_slice)
 399 {
 400     ProresContext *ctx = avctx->priv_data;
 401     int i, xp, yp;
 402     int total_size = 0;
 403     const uint16_t *src;
 404     int slice_width_factor = av_log2(mbs_per_slice);
 405     int num_cblocks, pwidth;
 406     int plane_factor, is_chroma;
 407     uint16_t *qmat;
 408
 409     if (ctx->force_quant) {
 410         qmat = ctx->quants[0];
 411     } else if (quant < MAX_STORED_Q) {
 412         qmat = ctx->quants[quant];
 413     } else {
 414         qmat = ctx->custom_q;
 415         for (i = 0; i < 64; i++)
 416             qmat[i] = ctx->quant_mat[i] * quant;
 417     }
 418
 419     for (i = 0; i < ctx->num_planes; i++) {
 420         is_chroma    = (i == 1 || i == 2);
 421         plane_factor = slice_width_factor + 2;
 422         if (is_chroma)
 423             plane_factor += ctx->chroma_factor - 3;
 424         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 425             xp          = x << 4;
 426             yp          = y << 4;
 427             num_cblocks = 4;
 428             pwidth      = avctx->width;
 429         } else {
 430             xp          = x << 3;
 431             yp          = y << 4;
 432             num_cblocks = 2;
 433             pwidth      = avctx->width >> 1;
 434         }
 435         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 436
 437         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 438                        pwidth, avctx->height, ctx->blocks[0], ctx->emu_buf,
 439                        mbs_per_slice, num_cblocks, is_chroma);
 440         sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
 441                                       mbs_per_slice, ctx->blocks[0],
 442                                       num_cblocks, plane_factor,
 443                                       qmat);
 444         total_size += sizes[i];
 445     }
 446     return total_size;
 447 }
 448
 449 static inline int estimate_vlc(unsigned codebook, int val)
 450 {
 451     unsigned int rice_order, exp_order, switch_bits, switch_val;
 452     int exponent;
 453
 454     /* number of prefix bits to switch between Rice and expGolomb */
 455     switch_bits = (codebook & 3) + 1;
 456     rice_order  =  codebook >> 5;       /* rice code order */
 457     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 458
 459     switch_val  = switch_bits << rice_order;
 460
 461     if (val >= switch_val) {
 462         val -= switch_val - (1 << exp_order);
 463         exponent = av_log2(val);
 464
 465         return exponent * 2 - exp_order + switch_bits + 1;
 466     } else {
 467         return (val >> rice_order) + rice_order + 1;
 468     }
 469 }
 470
 471 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 472                         int scale)
 473 {
 474     int i;
 475     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 476     int bits;
 477
 478     prev_dc  = (blocks[0] - 0x4000) / scale;
 479     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 480     sign     = 0;
 481     codebook = 3;
 482     blocks  += 64;
 483     *error  += FFABS(blocks[0] - 0x4000) % scale;
 484
 485     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 486         dc       = (blocks[0] - 0x4000) / scale;
 487         *error  += FFABS(blocks[0] - 0x4000) % scale;
 488         delta    = dc - prev_dc;
 489         new_sign = GET_SIGN(delta);
 490         delta    = (delta ^ sign) - sign;
 491         code     = MAKE_CODE(delta);
 492         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 493         codebook = (code + (code & 1)) >> 1;
 494         codebook = FFMIN(codebook, 3);
 495         sign     = new_sign;
 496         prev_dc  = dc;
 497     }
 498
 499     return bits;
 500 }
 501
 502 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 503                         int plane_size_factor,
 504                         const uint8_t *scan, const int16_t *qmat)
 505 {
 506     int idx, i;
 507     int run, level, run_cb, lev_cb;
 508     int max_coeffs, abs_level;
 509     int bits = 0;
 510
 511     max_coeffs = blocks_per_slice << 6;
 512     run_cb     = ff_prores_run_to_cb_index[4];
 513     lev_cb     = ff_prores_lev_to_cb_index[2];
 514     run        = 0;
 515
 516     for (i = 1; i < 64; i++) {
 517         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 518             level   = blocks[idx] / qmat[scan[i]];
 519             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 520             if (level) {
 521                 abs_level = FFABS(level);
 522                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 523                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 524                                      abs_level - 1) + 1;
 525
 526                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 527                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 528                 run    = 0;
 529             } else {
 530                 run++;
 531             }
 532         }
 533     }
 534
 535     return bits;
 536 }
 537
 538 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 539                                 const uint16_t *src, int linesize,
 540                                 int mbs_per_slice,
 541                                 int blocks_per_mb, int plane_size_factor,
 542                                 const int16_t *qmat, ProresThreadData *td)
 543 {
 544     int blocks_per_slice;
 545     int bits;
 546
 547     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 548
 549     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 550     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 551                          plane_size_factor, ctx->scantable.permutated, qmat);
 552
 553     return FFALIGN(bits, 8);
 554 }
 555
 556 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 557                             int trellis_node, int x, int y, int mbs_per_slice,
 558                             ProresThreadData *td)
 559 {
 560     ProresContext *ctx = avctx->priv_data;
 561     int i, q, pq, xp, yp;
 562     const uint16_t *src;
 563     int slice_width_factor = av_log2(mbs_per_slice);
 564     int num_cblocks[MAX_PLANES], pwidth;
 565     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 566     const int min_quant = ctx->profile_info->min_quant;
 567     const int max_quant = ctx->profile_info->max_quant;
 568     int error, bits, bits_limit;
 569     int mbs, prev, cur, new_score;
 570     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 571     int overquant;
 572     uint16_t *qmat;
 573
 574     mbs = x + mbs_per_slice;
 575
 576     for (i = 0; i < ctx->num_planes; i++) {
 577         is_chroma[i]    = (i == 1 || i == 2);
 578         plane_factor[i] = slice_width_factor + 2;
 579         if (is_chroma[i])
 580             plane_factor[i] += ctx->chroma_factor - 3;
 581         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 582             xp             = x << 4;
 583             yp             = y << 4;
 584             num_cblocks[i] = 4;
 585             pwidth         = avctx->width;
 586         } else {
 587             xp             = x << 3;
 588             yp             = y << 4;
 589             num_cblocks[i] = 2;
 590             pwidth         = avctx->width >> 1;
 591         }
 592         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 593
 594         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 595                        pwidth, avctx->height, td->blocks[i], td->emu_buf,
 596                        mbs_per_slice, num_cblocks[i], is_chroma[i]);
 597     }
 598
 599     for (q = min_quant; q < max_quant + 2; q++) {
 600         td->nodes[trellis_node + q].prev_node = -1;
 601         td->nodes[trellis_node + q].quant     = q;
 602     }
 603
 604     // todo: maybe perform coarser quantising to fit into frame size when needed
 605     for (q = min_quant; q <= max_quant; q++) {
 606         bits  = 0;
 607         error = 0;
 608         for (i = 0; i < ctx->num_planes; i++) {
 609             bits += estimate_slice_plane(ctx, &error, i,
 610                                          src, pic->linesize[i],
 611                                          mbs_per_slice,
 612                                          num_cblocks[i], plane_factor[i],
 613                                          ctx->quants[q], td);
 614         }
 615         if (bits > 65000 * 8) {
 616             error = SCORE_LIMIT;
 617             break;
 618         }
 619         slice_bits[q]  = bits;
 620         slice_score[q] = error;
 621     }
 622     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 623         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 624         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 625         overquant = max_quant;
 626     } else {
 627         for (q = max_quant + 1; q < 128; q++) {
 628             bits  = 0;
 629             error = 0;
 630             if (q < MAX_STORED_Q) {
 631                 qmat = ctx->quants[q];
 632             } else {
 633                 qmat = td->custom_q;
 634                 for (i = 0; i < 64; i++)
 635                     qmat[i] = ctx->quant_mat[i] * q;
 636             }
 637             for (i = 0; i < ctx->num_planes; i++) {
 638                 bits += estimate_slice_plane(ctx, &error, i,
 639                                              src, pic->linesize[i],
 640                                              mbs_per_slice,
 641                                              num_cblocks[i], plane_factor[i],
 642                                              qmat, td);
 643             }
 644             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 645                 break;
 646         }
 647
 648         slice_bits[max_quant + 1]  = bits;
 649         slice_score[max_quant + 1] = error;
 650         overquant = q;
 651     }
 652     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 653
 654     bits_limit = mbs * ctx->bits_per_mb;
 655     for (pq = min_quant; pq < max_quant + 2; pq++) {
 656         prev = trellis_node - TRELLIS_WIDTH + pq;
 657
 658         for (q = min_quant; q < max_quant + 2; q++) {
 659             cur = trellis_node + q;
 660
 661             bits  = td->nodes[prev].bits + slice_bits[q];
 662             error = slice_score[q];
 663             if (bits > bits_limit)
 664                 error = SCORE_LIMIT;
 665
 666             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 667                 new_score = td->nodes[prev].score + error;
 668             else
 669                 new_score = SCORE_LIMIT;
 670             if (td->nodes[cur].prev_node == -1 ||
 671                 td->nodes[cur].score >= new_score) {
 672
 673                 td->nodes[cur].bits      = bits;
 674                 td->nodes[cur].score     = new_score;
 675                 td->nodes[cur].prev_node = prev;
 676             }
 677         }
 678     }
 679
 680     error = td->nodes[trellis_node + min_quant].score;
 681     pq    = trellis_node + min_quant;
 682     for (q = min_quant + 1; q < max_quant + 2; q++) {
 683         if (td->nodes[trellis_node + q].score <= error) {
 684             error = td->nodes[trellis_node + q].score;
 685             pq    = trellis_node + q;
 686         }
 687     }
 688
 689     return pq;
 690 }
 691
 692 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 693                              int jobnr, int threadnr)
 694 {
 695     ProresContext *ctx = avctx->priv_data;
 696     ProresThreadData *td = ctx->tdata + threadnr;
 697     int mbs_per_slice = ctx->mbs_per_slice;
 698     int x, y = jobnr, mb, q = 0;
 699
 700     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 701         while (ctx->mb_width - x < mbs_per_slice)
 702             mbs_per_slice >>= 1;
 703         q = find_slice_quant(avctx, avctx->coded_frame,
 704                              (mb + 1) * TRELLIS_WIDTH, x, y,
 705                              mbs_per_slice, td);
 706     }
 707
 708     for (x = ctx->slices_width - 1; x >= 0; x--) {
 709         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 710         q = td->nodes[q].prev_node;
 711     }
 712
 713     return 0;
 714 }
 715
 716 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 717                         const AVFrame *pic, int *got_packet)
 718 {
 719     ProresContext *ctx = avctx->priv_data;
 720     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 721     uint8_t *picture_size_pos;
 722     PutBitContext pb;
 723     int x, y, i, mb, q = 0;
 724     int sizes[4] = { 0 };
 725     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 726     int frame_size, picture_size, slice_size;
 727     int mbs_per_slice = ctx->mbs_per_slice;
 728     int pkt_size, ret;
 729
 730     *avctx->coded_frame           = *pic;
 731     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 732     avctx->coded_frame->key_frame = 1;
 733
 734     pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
 735
 736     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 737         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 738         return ret;
 739     }
 740
 741     orig_buf = pkt->data;
 742
 743     // frame atom
 744     orig_buf += 4;                              // frame size
 745     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 746     buf = orig_buf;
 747
 748     // frame header
 749     tmp = buf;
 750     buf += 2;                                   // frame header size will be stored here
 751     bytestream_put_be16  (&buf, 0);             // version 1
 752     bytestream_put_buffer(&buf, ctx->vendor, 4);
 753     bytestream_put_be16  (&buf, avctx->width);
 754     bytestream_put_be16  (&buf, avctx->height);
 755     bytestream_put_byte  (&buf, ctx->chroma_factor << 6); // frame flags
 756     bytestream_put_byte  (&buf, 0);             // reserved
 757     bytestream_put_byte  (&buf, avctx->color_primaries);
 758     bytestream_put_byte  (&buf, avctx->color_trc);
 759     bytestream_put_byte  (&buf, avctx->colorspace);
 760     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 761     bytestream_put_byte  (&buf, 0);             // reserved
 762     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 763         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 764         // luma quantisation matrix
 765         for (i = 0; i < 64; i++)
 766             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 767         // chroma quantisation matrix
 768         for (i = 0; i < 64; i++)
 769             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 770     } else {
 771         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 772     }
 773     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 774
 775     // picture header
 776     picture_size_pos = buf + 1;
 777     bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 778     buf += 4;                                   // picture data size will be stored here
 779     bytestream_put_be16  (&buf, ctx->num_slices); // total number of slices
 780     bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 781
 782     // seek table - will be filled during slice encoding
 783     slice_sizes = buf;
 784     buf += ctx->num_slices * 2;
 785
 786     // slices
 787     if (!ctx->force_quant) {
 788         ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
 789                               ctx->mb_height);
 790         if (ret)
 791             return ret;
 792     }
 793
 794     for (y = 0; y < ctx->mb_height; y++) {
 795         mbs_per_slice = ctx->mbs_per_slice;
 796         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 797             q = ctx->force_quant ? ctx->force_quant
 798                                  : ctx->slice_q[mb + y * ctx->slices_width];
 799
 800             while (ctx->mb_width - x < mbs_per_slice)
 801                 mbs_per_slice >>= 1;
 802
 803             bytestream_put_byte(&buf, slice_hdr_size << 3);
 804             slice_hdr = buf;
 805             buf += slice_hdr_size - 1;
 806             init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 807             encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 808
 809             bytestream_put_byte(&slice_hdr, q);
 810             slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 811             for (i = 0; i < ctx->num_planes - 1; i++) {
 812                 bytestream_put_be16(&slice_hdr, sizes[i]);
 813                 slice_size += sizes[i];
 814             }
 815             bytestream_put_be16(&slice_sizes, slice_size);
 816             buf += slice_size - slice_hdr_size;
 817         }
 818     }
 819
 820     orig_buf -= 8;
 821     frame_size = buf - orig_buf;
 822     picture_size = buf - picture_size_pos - 6;
 823     bytestream_put_be32(&orig_buf, frame_size);
 824     bytestream_put_be32(&picture_size_pos, picture_size);
 825
 826     pkt->size   = frame_size;
 827     pkt->flags |= AV_PKT_FLAG_KEY;
 828     *got_packet = 1;
 829
 830     return 0;
 831 }
 832
 833 static av_cold int encode_close(AVCodecContext *avctx)
 834 {
 835     ProresContext *ctx = avctx->priv_data;
 836     int i;
 837
 838     if (avctx->coded_frame->data[0])
 839         avctx->release_buffer(avctx, avctx->coded_frame);
 840
 841     av_freep(&avctx->coded_frame);
 842
 843     if (ctx->tdata) {
 844         for (i = 0; i < avctx->thread_count; i++)
 845             av_free(ctx->tdata[i].nodes);
 846     }
 847     av_freep(&ctx->tdata);
 848     av_freep(&ctx->slice_q);
 849
 850     return 0;
 851 }
 852
 853 static av_cold int encode_init(AVCodecContext *avctx)
 854 {
 855     ProresContext *ctx = avctx->priv_data;
 856     int mps;
 857     int i, j;
 858     int min_quant, max_quant;
 859
 860     avctx->bits_per_raw_sample = 10;
 861     avctx->coded_frame = avcodec_alloc_frame();
 862     if (!avctx->coded_frame)
 863         return AVERROR(ENOMEM);
 864
 865     ff_proresdsp_init(&ctx->dsp);
 866     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 867                       ff_prores_progressive_scan);
 868
 869     mps = ctx->mbs_per_slice;
 870     if (mps & (mps - 1)) {
 871         av_log(avctx, AV_LOG_ERROR,
 872                "there should be an integer power of two MBs per slice\n");
 873         return AVERROR(EINVAL);
 874     }
 875
 876     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 877                          ? CFACTOR_Y422
 878                          : CFACTOR_Y444;
 879     ctx->profile_info  = prores_profile_info + ctx->profile;
 880     ctx->num_planes    = 3;
 881
 882     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 883     ctx->mb_height     = FFALIGN(avctx->height, 16) >> 4;
 884     ctx->slices_width  = ctx->mb_width / mps;
 885     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 886     ctx->num_slices    = ctx->mb_height * ctx->slices_width;
 887
 888     if (ctx->quant_sel == -1)
 889         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
 890     else
 891         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
 892
 893     if (strlen(ctx->vendor) != 4) {
 894         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 895         return AVERROR_INVALIDDATA;
 896     }
 897
 898     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
 899     if (!ctx->force_quant) {
 900         if (!ctx->bits_per_mb) {
 901             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 902                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
 903                     break;
 904             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 905         } else if (ctx->bits_per_mb < 128) {
 906             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
 907             return AVERROR_INVALIDDATA;
 908         }
 909
 910         min_quant = ctx->profile_info->min_quant;
 911         max_quant = ctx->profile_info->max_quant;
 912         for (i = min_quant; i < MAX_STORED_Q; i++) {
 913             for (j = 0; j < 64; j++)
 914                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
 915         }
 916
 917         ctx->slice_q = av_malloc(ctx->num_slices * sizeof(*ctx->slice_q));
 918         if (!ctx->slice_q) {
 919             encode_close(avctx);
 920             return AVERROR(ENOMEM);
 921         }
 922
 923         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
 924         if (!ctx->tdata) {
 925             encode_close(avctx);
 926             return AVERROR(ENOMEM);
 927         }
 928
 929         for (j = 0; j < avctx->thread_count; j++) {
 930             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
 931                                             * TRELLIS_WIDTH
 932                                             * sizeof(*ctx->tdata->nodes));
 933             if (!ctx->tdata[j].nodes) {
 934                 encode_close(avctx);
 935                 return AVERROR(ENOMEM);
 936             }
 937             for (i = min_quant; i < max_quant + 2; i++) {
 938                 ctx->tdata[j].nodes[i].prev_node = -1;
 939                 ctx->tdata[j].nodes[i].bits      = 0;
 940                 ctx->tdata[j].nodes[i].score     = 0;
 941             }
 942         }
 943     } else {
 944         int ls = 0;
 945
 946         if (ctx->force_quant > 64) {
 947             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
 948             return AVERROR_INVALIDDATA;
 949         }
 950
 951         for (j = 0; j < 64; j++) {
 952             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
 953             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
 954         }
 955
 956         ctx->bits_per_mb = ls * 8;
 957         if (ctx->chroma_factor == CFACTOR_Y444)
 958             ctx->bits_per_mb += ls * 4;
 959         if (ctx->num_planes == 4)
 960             ctx->bits_per_mb += ls * 4;
 961     }
 962
 963     ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
 964                                          + (2 * mps * ctx->bits_per_mb) / 8)
 965                       + 200;
 966
 967     avctx->codec_tag   = ctx->profile_info->tag;
 968
 969     av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
 970            ctx->profile, ctx->num_slices, ctx->bits_per_mb);
 971     av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
 972            ctx->frame_size);
 973
 974     return 0;
 975 }
 976
 977 #define OFFSET(x) offsetof(ProresContext, x)
 978 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 979
 980 static const AVOption options[] = {
 981     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
 982         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
 983     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
 984         { PRORES_PROFILE_STANDARD },
 985         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
 986     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
 987         0, 0, VE, "profile" },
 988     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
 989         0, 0, VE, "profile" },
 990     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
 991         0, 0, VE, "profile" },
 992     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
 993         0, 0, VE, "profile" },
 994     { "vendor", "vendor ID", OFFSET(vendor),
 995         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
 996     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
 997         AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE },
 998     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
 999         { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1000     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { -1 },
1001         0, 0, VE, "quant_mat" },
1002     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY },
1003         0, 0, VE, "quant_mat" },
1004     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT },
1005         0, 0, VE, "quant_mat" },
1006     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD },
1007         0, 0, VE, "quant_mat" },
1008     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ },
1009         0, 0, VE, "quant_mat" },
1010     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT },
1011         0, 0, VE, "quant_mat" },
1012     { NULL }
1013 };
1014
1015 static const AVClass proresenc_class = {
1016     .class_name = "ProRes encoder",
1017     .item_name  = av_default_item_name,
1018     .option     = options,
1019     .version    = LIBAVUTIL_VERSION_INT,
1020 };
1021
1022 AVCodec ff_prores_encoder = {
1023     .name           = "prores",
1024     .type           = AVMEDIA_TYPE_VIDEO,
1025     .id             = CODEC_ID_PRORES,
1026     .priv_data_size = sizeof(ProresContext),
1027     .init           = encode_init,
1028     .close          = encode_close,
1029     .encode2        = encode_frame,
1030     .capabilities   = CODEC_CAP_SLICE_THREADS,
1031     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1032     .pix_fmts       = (const enum PixelFormat[]) {
1033                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
1034                       },
1035     .priv_class     = &proresenc_class,
1036 };