git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 enum {
  46     QUANT_MAT_PROXY = 0,
  47     QUANT_MAT_LT,
  48     QUANT_MAT_STANDARD,
  49     QUANT_MAT_HQ,
  50     QUANT_MAT_DEFAULT,
  51 };
  52
  53 static const uint8_t prores_quant_matrices[][64] = {
  54     { // proxy
  55          4,  7,  9, 11, 13, 14, 15, 63,
  56          7,  7, 11, 12, 14, 15, 63, 63,
  57          9, 11, 13, 14, 15, 63, 63, 63,
  58         11, 11, 13, 14, 63, 63, 63, 63,
  59         11, 13, 14, 63, 63, 63, 63, 63,
  60         13, 14, 63, 63, 63, 63, 63, 63,
  61         13, 63, 63, 63, 63, 63, 63, 63,
  62         63, 63, 63, 63, 63, 63, 63, 63,
  63     },
  64     { // LT
  65          4,  5,  6,  7,  9, 11, 13, 15,
  66          5,  5,  7,  8, 11, 13, 15, 17,
  67          6,  7,  9, 11, 13, 15, 15, 17,
  68          7,  7,  9, 11, 13, 15, 17, 19,
  69          7,  9, 11, 13, 14, 16, 19, 23,
  70          9, 11, 13, 14, 16, 19, 23, 29,
  71          9, 11, 13, 15, 17, 21, 28, 35,
  72         11, 13, 16, 17, 21, 28, 35, 41,
  73     },
  74     { // standard
  75          4,  4,  5,  5,  6,  7,  7,  9,
  76          4,  4,  5,  6,  7,  7,  9,  9,
  77          5,  5,  6,  7,  7,  9,  9, 10,
  78          5,  5,  6,  7,  7,  9,  9, 10,
  79          5,  6,  7,  7,  8,  9, 10, 12,
  80          6,  7,  7,  8,  9, 10, 12, 15,
  81          6,  7,  7,  9, 10, 11, 14, 17,
  82          7,  7,  9, 10, 11, 14, 17, 21,
  83     },
  84     { // high quality
  85          4,  4,  4,  4,  4,  4,  4,  4,
  86          4,  4,  4,  4,  4,  4,  4,  4,
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  5,
  89          4,  4,  4,  4,  4,  4,  5,  5,
  90          4,  4,  4,  4,  4,  5,  5,  6,
  91          4,  4,  4,  4,  5,  5,  6,  7,
  92          4,  4,  4,  4,  5,  6,  7,  7,
  93     },
  94     { // codec default
  95          4,  4,  4,  4,  4,  4,  4,  4,
  96          4,  4,  4,  4,  4,  4,  4,  4,
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103     },
 104 };
 105
 106 #define NUM_MB_LIMITS 4
 107 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 108     1620, // up to 720x576
 109     2700, // up to 960x720
 110     6075, // up to 1440x1080
 111     9216, // up to 2048x1152
 112 };
 113
 114 static const struct prores_profile {
 115     const char *full_name;
 116     uint32_t    tag;
 117     int         min_quant;
 118     int         max_quant;
 119     int         br_tab[NUM_MB_LIMITS];
 120     int         quant;
 121 } prores_profile_info[4] = {
 122     {
 123         .full_name = "proxy",
 124         .tag       = MKTAG('a', 'p', 'c', 'o'),
 125         .min_quant = 4,
 126         .max_quant = 8,
 127         .br_tab    = { 300, 242, 220, 194 },
 128         .quant     = QUANT_MAT_PROXY,
 129     },
 130     {
 131         .full_name = "LT",
 132         .tag       = MKTAG('a', 'p', 'c', 's'),
 133         .min_quant = 1,
 134         .max_quant = 9,
 135         .br_tab    = { 720, 560, 490, 440 },
 136         .quant     = QUANT_MAT_LT,
 137     },
 138     {
 139         .full_name = "standard",
 140         .tag       = MKTAG('a', 'p', 'c', 'n'),
 141         .min_quant = 1,
 142         .max_quant = 6,
 143         .br_tab    = { 1050, 808, 710, 632 },
 144         .quant     = QUANT_MAT_STANDARD,
 145     },
 146     {
 147         .full_name = "high quality",
 148         .tag       = MKTAG('a', 'p', 'c', 'h'),
 149         .min_quant = 1,
 150         .max_quant = 6,
 151         .br_tab    = { 1566, 1216, 1070, 950 },
 152         .quant     = QUANT_MAT_HQ,
 153     }
 154 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 155 };
 156
 157 #define TRELLIS_WIDTH 16
 158 #define SCORE_LIMIT   INT_MAX / 2
 159
 160 struct TrellisNode {
 161     int prev_node;
 162     int quant;
 163     int bits;
 164     int score;
 165 };
 166
 167 #define MAX_STORED_Q 16
 168
 169 typedef struct ProresThreadData {
 170     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 171     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 172     int16_t custom_q[64];
 173     struct TrellisNode *nodes;
 174 } ProresThreadData;
 175
 176 typedef struct ProresContext {
 177     AVClass *class;
 178     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 179     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 180     int16_t quants[MAX_STORED_Q][64];
 181     int16_t custom_q[64];
 182     const uint8_t *quant_mat;
 183
 184     ProresDSPContext dsp;
 185     ScanTable  scantable;
 186
 187     int mb_width, mb_height;
 188     int mbs_per_slice;
 189     int num_chroma_blocks, chroma_factor;
 190     int slices_width;
 191     int slices_per_picture;
 192     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 193     int cur_picture_idx;
 194     int num_planes;
 195     int bits_per_mb;
 196     int force_quant;
 197
 198     char *vendor;
 199     int quant_sel;
 200
 201     int frame_size_upper_bound;
 202
 203     int profile;
 204     const struct prores_profile *profile_info;
 205
 206     int *slice_q;
 207
 208     ProresThreadData *tdata;
 209 } ProresContext;
 210
 211 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 212                            int linesize, int x, int y, int w, int h,
 213                            DCTELEM *blocks, uint16_t *emu_buf,
 214                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 215 {
 216     const uint16_t *esrc;
 217     const int mb_width = 4 * blocks_per_mb;
 218     int elinesize;
 219     int i, j, k;
 220
 221     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 222         if (x >= w) {
 223             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 224                               * sizeof(*blocks));
 225             return;
 226         }
 227         if (x + mb_width <= w && y + 16 <= h) {
 228             esrc      = src;
 229             elinesize = linesize;
 230         } else {
 231             int bw, bh, pix;
 232
 233             esrc      = emu_buf;
 234             elinesize = 16 * sizeof(*emu_buf);
 235
 236             bw = FFMIN(w - x, mb_width);
 237             bh = FFMIN(h - y, 16);
 238
 239             for (j = 0; j < bh; j++) {
 240                 memcpy(emu_buf + j * 16,
 241                        (const uint8_t*)src + j * linesize,
 242                        bw * sizeof(*src));
 243                 pix = emu_buf[j * 16 + bw - 1];
 244                 for (k = bw; k < mb_width; k++)
 245                     emu_buf[j * 16 + k] = pix;
 246             }
 247             for (; j < 16; j++)
 248                 memcpy(emu_buf + j * 16,
 249                        emu_buf + (bh - 1) * 16,
 250                        mb_width * sizeof(*emu_buf));
 251         }
 252         if (!is_chroma) {
 253             ctx->dsp.fdct(esrc, elinesize, blocks);
 254             blocks += 64;
 255             if (blocks_per_mb > 2) {
 256                 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
 257                 blocks += 64;
 258             }
 259             ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
 260             blocks += 64;
 261             if (blocks_per_mb > 2) {
 262                 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
 263                 blocks += 64;
 264             }
 265         } else {
 266             ctx->dsp.fdct(esrc, elinesize, blocks);
 267             blocks += 64;
 268             ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
 269             blocks += 64;
 270             if (blocks_per_mb > 2) {
 271                 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
 272                 blocks += 64;
 273                 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
 274                 blocks += 64;
 275             }
 276         }
 277
 278         x += mb_width;
 279     }
 280 }
 281
 282 /**
 283  * Write an unsigned rice/exp golomb codeword.
 284  */
 285 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 286 {
 287     unsigned int rice_order, exp_order, switch_bits, switch_val;
 288     int exponent;
 289
 290     /* number of prefix bits to switch between Rice and expGolomb */
 291     switch_bits = (codebook & 3) + 1;
 292     rice_order  =  codebook >> 5;       /* rice code order */
 293     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 294
 295     switch_val  = switch_bits << rice_order;
 296
 297     if (val >= switch_val) {
 298         val -= switch_val - (1 << exp_order);
 299         exponent = av_log2(val);
 300
 301         put_bits(pb, exponent - exp_order + switch_bits, 0);
 302         put_bits(pb, exponent + 1, val);
 303     } else {
 304         exponent = val >> rice_order;
 305
 306         if (exponent)
 307             put_bits(pb, exponent, 0);
 308         put_bits(pb, 1, 1);
 309         if (rice_order)
 310             put_sbits(pb, rice_order, val);
 311     }
 312 }
 313
 314 #define GET_SIGN(x)  ((x) >> 31)
 315 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 316
 317 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 318                        int blocks_per_slice, int scale)
 319 {
 320     int i;
 321     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 322
 323     prev_dc = (blocks[0] - 0x4000) / scale;
 324     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 325     sign     = 0;
 326     codebook = 3;
 327     blocks  += 64;
 328
 329     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 330         dc       = (blocks[0] - 0x4000) / scale;
 331         delta    = dc - prev_dc;
 332         new_sign = GET_SIGN(delta);
 333         delta    = (delta ^ sign) - sign;
 334         code     = MAKE_CODE(delta);
 335         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 336         codebook = (code + (code & 1)) >> 1;
 337         codebook = FFMIN(codebook, 3);
 338         sign     = new_sign;
 339         prev_dc  = dc;
 340     }
 341 }
 342
 343 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 344                        int blocks_per_slice,
 345                        int plane_size_factor,
 346                        const uint8_t *scan, const int16_t *qmat)
 347 {
 348     int idx, i;
 349     int run, level, run_cb, lev_cb;
 350     int max_coeffs, abs_level;
 351
 352     max_coeffs = blocks_per_slice << 6;
 353     run_cb     = ff_prores_run_to_cb_index[4];
 354     lev_cb     = ff_prores_lev_to_cb_index[2];
 355     run        = 0;
 356
 357     for (i = 1; i < 64; i++) {
 358         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 359             level = blocks[idx] / qmat[scan[i]];
 360             if (level) {
 361                 abs_level = FFABS(level);
 362                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 363                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 364                                     abs_level - 1);
 365                 put_sbits(pb, 1, GET_SIGN(level));
 366
 367                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 368                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 369                 run    = 0;
 370             } else {
 371                 run++;
 372             }
 373         }
 374     }
 375 }
 376
 377 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 378                               const uint16_t *src, int linesize,
 379                               int mbs_per_slice, DCTELEM *blocks,
 380                               int blocks_per_mb, int plane_size_factor,
 381                               const int16_t *qmat)
 382 {
 383     int blocks_per_slice, saved_pos;
 384
 385     saved_pos = put_bits_count(pb);
 386     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 387
 388     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 389     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 390                ctx->scantable.permutated, qmat);
 391     flush_put_bits(pb);
 392
 393     return (put_bits_count(pb) - saved_pos) >> 3;
 394 }
 395
 396 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 397                         PutBitContext *pb,
 398                         int sizes[4], int x, int y, int quant,
 399                         int mbs_per_slice)
 400 {
 401     ProresContext *ctx = avctx->priv_data;
 402     int i, xp, yp;
 403     int total_size = 0;
 404     const uint16_t *src;
 405     int slice_width_factor = av_log2(mbs_per_slice);
 406     int num_cblocks, pwidth, linesize, line_add;
 407     int plane_factor, is_chroma;
 408     uint16_t *qmat;
 409
 410     if (ctx->pictures_per_frame == 1)
 411         line_add = 0;
 412     else
 413         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 414
 415     if (ctx->force_quant) {
 416         qmat = ctx->quants[0];
 417     } else if (quant < MAX_STORED_Q) {
 418         qmat = ctx->quants[quant];
 419     } else {
 420         qmat = ctx->custom_q;
 421         for (i = 0; i < 64; i++)
 422             qmat[i] = ctx->quant_mat[i] * quant;
 423     }
 424
 425     for (i = 0; i < ctx->num_planes; i++) {
 426         is_chroma    = (i == 1 || i == 2);
 427         plane_factor = slice_width_factor + 2;
 428         if (is_chroma)
 429             plane_factor += ctx->chroma_factor - 3;
 430         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 431             xp          = x << 4;
 432             yp          = y << 4;
 433             num_cblocks = 4;
 434             pwidth      = avctx->width;
 435         } else {
 436             xp          = x << 3;
 437             yp          = y << 4;
 438             num_cblocks = 2;
 439             pwidth      = avctx->width >> 1;
 440         }
 441
 442         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 443         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 444                                 line_add * pic->linesize[i]) + xp;
 445
 446         get_slice_data(ctx, src, linesize, xp, yp,
 447                        pwidth, avctx->height / ctx->pictures_per_frame,
 448                        ctx->blocks[0], ctx->emu_buf,
 449                        mbs_per_slice, num_cblocks, is_chroma);
 450         sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 451                                       mbs_per_slice, ctx->blocks[0],
 452                                       num_cblocks, plane_factor,
 453                                       qmat);
 454         total_size += sizes[i];
 455     }
 456     return total_size;
 457 }
 458
 459 static inline int estimate_vlc(unsigned codebook, int val)
 460 {
 461     unsigned int rice_order, exp_order, switch_bits, switch_val;
 462     int exponent;
 463
 464     /* number of prefix bits to switch between Rice and expGolomb */
 465     switch_bits = (codebook & 3) + 1;
 466     rice_order  =  codebook >> 5;       /* rice code order */
 467     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 468
 469     switch_val  = switch_bits << rice_order;
 470
 471     if (val >= switch_val) {
 472         val -= switch_val - (1 << exp_order);
 473         exponent = av_log2(val);
 474
 475         return exponent * 2 - exp_order + switch_bits + 1;
 476     } else {
 477         return (val >> rice_order) + rice_order + 1;
 478     }
 479 }
 480
 481 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 482                         int scale)
 483 {
 484     int i;
 485     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 486     int bits;
 487
 488     prev_dc  = (blocks[0] - 0x4000) / scale;
 489     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 490     sign     = 0;
 491     codebook = 3;
 492     blocks  += 64;
 493     *error  += FFABS(blocks[0] - 0x4000) % scale;
 494
 495     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 496         dc       = (blocks[0] - 0x4000) / scale;
 497         *error  += FFABS(blocks[0] - 0x4000) % scale;
 498         delta    = dc - prev_dc;
 499         new_sign = GET_SIGN(delta);
 500         delta    = (delta ^ sign) - sign;
 501         code     = MAKE_CODE(delta);
 502         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 503         codebook = (code + (code & 1)) >> 1;
 504         codebook = FFMIN(codebook, 3);
 505         sign     = new_sign;
 506         prev_dc  = dc;
 507     }
 508
 509     return bits;
 510 }
 511
 512 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 513                         int plane_size_factor,
 514                         const uint8_t *scan, const int16_t *qmat)
 515 {
 516     int idx, i;
 517     int run, level, run_cb, lev_cb;
 518     int max_coeffs, abs_level;
 519     int bits = 0;
 520
 521     max_coeffs = blocks_per_slice << 6;
 522     run_cb     = ff_prores_run_to_cb_index[4];
 523     lev_cb     = ff_prores_lev_to_cb_index[2];
 524     run        = 0;
 525
 526     for (i = 1; i < 64; i++) {
 527         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 528             level   = blocks[idx] / qmat[scan[i]];
 529             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 530             if (level) {
 531                 abs_level = FFABS(level);
 532                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 533                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 534                                      abs_level - 1) + 1;
 535
 536                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 537                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 538                 run    = 0;
 539             } else {
 540                 run++;
 541             }
 542         }
 543     }
 544
 545     return bits;
 546 }
 547
 548 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 549                                 const uint16_t *src, int linesize,
 550                                 int mbs_per_slice,
 551                                 int blocks_per_mb, int plane_size_factor,
 552                                 const int16_t *qmat, ProresThreadData *td)
 553 {
 554     int blocks_per_slice;
 555     int bits;
 556
 557     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 558
 559     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 560     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 561                          plane_size_factor, ctx->scantable.permutated, qmat);
 562
 563     return FFALIGN(bits, 8);
 564 }
 565
 566 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 567                             int trellis_node, int x, int y, int mbs_per_slice,
 568                             ProresThreadData *td)
 569 {
 570     ProresContext *ctx = avctx->priv_data;
 571     int i, q, pq, xp, yp;
 572     const uint16_t *src;
 573     int slice_width_factor = av_log2(mbs_per_slice);
 574     int num_cblocks[MAX_PLANES], pwidth;
 575     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 576     const int min_quant = ctx->profile_info->min_quant;
 577     const int max_quant = ctx->profile_info->max_quant;
 578     int error, bits, bits_limit;
 579     int mbs, prev, cur, new_score;
 580     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 581     int overquant;
 582     uint16_t *qmat;
 583     int linesize[4], line_add;
 584
 585     if (ctx->pictures_per_frame == 1)
 586         line_add = 0;
 587     else
 588         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 589     mbs = x + mbs_per_slice;
 590
 591     for (i = 0; i < ctx->num_planes; i++) {
 592         is_chroma[i]    = (i == 1 || i == 2);
 593         plane_factor[i] = slice_width_factor + 2;
 594         if (is_chroma[i])
 595             plane_factor[i] += ctx->chroma_factor - 3;
 596         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 597             xp             = x << 4;
 598             yp             = y << 4;
 599             num_cblocks[i] = 4;
 600             pwidth         = avctx->width;
 601         } else {
 602             xp             = x << 3;
 603             yp             = y << 4;
 604             num_cblocks[i] = 2;
 605             pwidth         = avctx->width >> 1;
 606         }
 607
 608         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
 609         src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
 610                                 line_add * pic->linesize[i]) + xp;
 611
 612         get_slice_data(ctx, src, linesize[i], xp, yp,
 613                        pwidth, avctx->height / ctx->pictures_per_frame,
 614                        td->blocks[i], td->emu_buf,
 615                        mbs_per_slice, num_cblocks[i], is_chroma[i]);
 616     }
 617
 618     for (q = min_quant; q < max_quant + 2; q++) {
 619         td->nodes[trellis_node + q].prev_node = -1;
 620         td->nodes[trellis_node + q].quant     = q;
 621     }
 622
 623     // todo: maybe perform coarser quantising to fit into frame size when needed
 624     for (q = min_quant; q <= max_quant; q++) {
 625         bits  = 0;
 626         error = 0;
 627         for (i = 0; i < ctx->num_planes; i++) {
 628             bits += estimate_slice_plane(ctx, &error, i,
 629                                          src, linesize[i],
 630                                          mbs_per_slice,
 631                                          num_cblocks[i], plane_factor[i],
 632                                          ctx->quants[q], td);
 633         }
 634         if (bits > 65000 * 8) {
 635             error = SCORE_LIMIT;
 636             break;
 637         }
 638         slice_bits[q]  = bits;
 639         slice_score[q] = error;
 640     }
 641     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 642         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 643         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 644         overquant = max_quant;
 645     } else {
 646         for (q = max_quant + 1; q < 128; q++) {
 647             bits  = 0;
 648             error = 0;
 649             if (q < MAX_STORED_Q) {
 650                 qmat = ctx->quants[q];
 651             } else {
 652                 qmat = td->custom_q;
 653                 for (i = 0; i < 64; i++)
 654                     qmat[i] = ctx->quant_mat[i] * q;
 655             }
 656             for (i = 0; i < ctx->num_planes; i++) {
 657                 bits += estimate_slice_plane(ctx, &error, i,
 658                                              src, linesize[i],
 659                                              mbs_per_slice,
 660                                              num_cblocks[i], plane_factor[i],
 661                                              qmat, td);
 662             }
 663             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 664                 break;
 665         }
 666
 667         slice_bits[max_quant + 1]  = bits;
 668         slice_score[max_quant + 1] = error;
 669         overquant = q;
 670     }
 671     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 672
 673     bits_limit = mbs * ctx->bits_per_mb;
 674     for (pq = min_quant; pq < max_quant + 2; pq++) {
 675         prev = trellis_node - TRELLIS_WIDTH + pq;
 676
 677         for (q = min_quant; q < max_quant + 2; q++) {
 678             cur = trellis_node + q;
 679
 680             bits  = td->nodes[prev].bits + slice_bits[q];
 681             error = slice_score[q];
 682             if (bits > bits_limit)
 683                 error = SCORE_LIMIT;
 684
 685             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 686                 new_score = td->nodes[prev].score + error;
 687             else
 688                 new_score = SCORE_LIMIT;
 689             if (td->nodes[cur].prev_node == -1 ||
 690                 td->nodes[cur].score >= new_score) {
 691
 692                 td->nodes[cur].bits      = bits;
 693                 td->nodes[cur].score     = new_score;
 694                 td->nodes[cur].prev_node = prev;
 695             }
 696         }
 697     }
 698
 699     error = td->nodes[trellis_node + min_quant].score;
 700     pq    = trellis_node + min_quant;
 701     for (q = min_quant + 1; q < max_quant + 2; q++) {
 702         if (td->nodes[trellis_node + q].score <= error) {
 703             error = td->nodes[trellis_node + q].score;
 704             pq    = trellis_node + q;
 705         }
 706     }
 707
 708     return pq;
 709 }
 710
 711 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 712                              int jobnr, int threadnr)
 713 {
 714     ProresContext *ctx = avctx->priv_data;
 715     ProresThreadData *td = ctx->tdata + threadnr;
 716     int mbs_per_slice = ctx->mbs_per_slice;
 717     int x, y = jobnr, mb, q = 0;
 718
 719     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 720         while (ctx->mb_width - x < mbs_per_slice)
 721             mbs_per_slice >>= 1;
 722         q = find_slice_quant(avctx, avctx->coded_frame,
 723                              (mb + 1) * TRELLIS_WIDTH, x, y,
 724                              mbs_per_slice, td);
 725     }
 726
 727     for (x = ctx->slices_width - 1; x >= 0; x--) {
 728         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 729         q = td->nodes[q].prev_node;
 730     }
 731
 732     return 0;
 733 }
 734
 735 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 736                         const AVFrame *pic, int *got_packet)
 737 {
 738     ProresContext *ctx = avctx->priv_data;
 739     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 740     uint8_t *picture_size_pos;
 741     PutBitContext pb;
 742     int x, y, i, mb, q = 0;
 743     int sizes[4] = { 0 };
 744     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 745     int frame_size, picture_size, slice_size;
 746     int pkt_size, ret;
 747     uint8_t frame_flags;
 748
 749     *avctx->coded_frame           = *pic;
 750     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 751     avctx->coded_frame->key_frame = 1;
 752
 753     pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
 754
 755     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 756         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 757         return ret;
 758     }
 759
 760     orig_buf = pkt->data;
 761
 762     // frame atom
 763     orig_buf += 4;                              // frame size
 764     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 765     buf = orig_buf;
 766
 767     // frame header
 768     tmp = buf;
 769     buf += 2;                                   // frame header size will be stored here
 770     bytestream_put_be16  (&buf, 0);             // version 1
 771     bytestream_put_buffer(&buf, ctx->vendor, 4);
 772     bytestream_put_be16  (&buf, avctx->width);
 773     bytestream_put_be16  (&buf, avctx->height);
 774
 775     frame_flags = ctx->chroma_factor << 6;
 776     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 777         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 778     bytestream_put_byte  (&buf, frame_flags);
 779
 780     bytestream_put_byte  (&buf, 0);             // reserved
 781     bytestream_put_byte  (&buf, avctx->color_primaries);
 782     bytestream_put_byte  (&buf, avctx->color_trc);
 783     bytestream_put_byte  (&buf, avctx->colorspace);
 784     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 785     bytestream_put_byte  (&buf, 0);             // reserved
 786     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 787         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 788         // luma quantisation matrix
 789         for (i = 0; i < 64; i++)
 790             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 791         // chroma quantisation matrix
 792         for (i = 0; i < 64; i++)
 793             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 794     } else {
 795         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 796     }
 797     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 798
 799     for (ctx->cur_picture_idx = 0;
 800          ctx->cur_picture_idx < ctx->pictures_per_frame;
 801          ctx->cur_picture_idx++) {
 802         // picture header
 803         picture_size_pos = buf + 1;
 804         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 805         buf += 4;                                   // picture data size will be stored here
 806         bytestream_put_be16  (&buf, ctx->slices_per_picture);
 807         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 808
 809         // seek table - will be filled during slice encoding
 810         slice_sizes = buf;
 811         buf += ctx->slices_per_picture * 2;
 812
 813         // slices
 814         if (!ctx->force_quant) {
 815             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
 816                                   ctx->mb_height);
 817             if (ret)
 818                 return ret;
 819         }
 820
 821         for (y = 0; y < ctx->mb_height; y++) {
 822             int mbs_per_slice = ctx->mbs_per_slice;
 823             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 824                 q = ctx->force_quant ? ctx->force_quant
 825                                      : ctx->slice_q[mb + y * ctx->slices_width];
 826
 827                 while (ctx->mb_width - x < mbs_per_slice)
 828                     mbs_per_slice >>= 1;
 829
 830                 bytestream_put_byte(&buf, slice_hdr_size << 3);
 831                 slice_hdr = buf;
 832                 buf += slice_hdr_size - 1;
 833                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 834                 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 835
 836                 bytestream_put_byte(&slice_hdr, q);
 837                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 838                 for (i = 0; i < ctx->num_planes - 1; i++) {
 839                     bytestream_put_be16(&slice_hdr, sizes[i]);
 840                     slice_size += sizes[i];
 841                 }
 842                 bytestream_put_be16(&slice_sizes, slice_size);
 843                 buf += slice_size - slice_hdr_size;
 844             }
 845         }
 846
 847         if (ctx->pictures_per_frame == 1)
 848             picture_size = buf - picture_size_pos - 6;
 849         else
 850             picture_size = buf - picture_size_pos + 1;
 851         bytestream_put_be32(&picture_size_pos, picture_size);
 852     }
 853
 854     orig_buf -= 8;
 855     frame_size = buf - orig_buf;
 856     bytestream_put_be32(&orig_buf, frame_size);
 857
 858     pkt->size   = frame_size;
 859     pkt->flags |= AV_PKT_FLAG_KEY;
 860     *got_packet = 1;
 861
 862     return 0;
 863 }
 864
 865 static av_cold int encode_close(AVCodecContext *avctx)
 866 {
 867     ProresContext *ctx = avctx->priv_data;
 868     int i;
 869
 870     if (avctx->coded_frame->data[0])
 871         avctx->release_buffer(avctx, avctx->coded_frame);
 872
 873     av_freep(&avctx->coded_frame);
 874
 875     if (ctx->tdata) {
 876         for (i = 0; i < avctx->thread_count; i++)
 877             av_free(ctx->tdata[i].nodes);
 878     }
 879     av_freep(&ctx->tdata);
 880     av_freep(&ctx->slice_q);
 881
 882     return 0;
 883 }
 884
 885 static av_cold int encode_init(AVCodecContext *avctx)
 886 {
 887     ProresContext *ctx = avctx->priv_data;
 888     int mps;
 889     int i, j;
 890     int min_quant, max_quant;
 891     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 892
 893     avctx->bits_per_raw_sample = 10;
 894     avctx->coded_frame = avcodec_alloc_frame();
 895     if (!avctx->coded_frame)
 896         return AVERROR(ENOMEM);
 897
 898     ff_proresdsp_init(&ctx->dsp);
 899     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 900                       interlaced ? ff_prores_interlaced_scan
 901                                  : ff_prores_progressive_scan);
 902
 903     mps = ctx->mbs_per_slice;
 904     if (mps & (mps - 1)) {
 905         av_log(avctx, AV_LOG_ERROR,
 906                "there should be an integer power of two MBs per slice\n");
 907         return AVERROR(EINVAL);
 908     }
 909
 910     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 911                          ? CFACTOR_Y422
 912                          : CFACTOR_Y444;
 913     ctx->profile_info  = prores_profile_info + ctx->profile;
 914     ctx->num_planes    = 3;
 915
 916     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 917
 918     if (interlaced)
 919         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
 920     else
 921         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
 922
 923     ctx->slices_width  = ctx->mb_width / mps;
 924     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 925     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
 926     ctx->pictures_per_frame = 1 + interlaced;
 927
 928     if (ctx->quant_sel == -1)
 929         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
 930     else
 931         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
 932
 933     if (strlen(ctx->vendor) != 4) {
 934         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 935         return AVERROR_INVALIDDATA;
 936     }
 937
 938     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
 939     if (!ctx->force_quant) {
 940         if (!ctx->bits_per_mb) {
 941             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 942                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
 943                                            ctx->pictures_per_frame)
 944                     break;
 945             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 946         } else if (ctx->bits_per_mb < 128) {
 947             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
 948             return AVERROR_INVALIDDATA;
 949         }
 950
 951         min_quant = ctx->profile_info->min_quant;
 952         max_quant = ctx->profile_info->max_quant;
 953         for (i = min_quant; i < MAX_STORED_Q; i++) {
 954             for (j = 0; j < 64; j++)
 955                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
 956         }
 957
 958         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
 959         if (!ctx->slice_q) {
 960             encode_close(avctx);
 961             return AVERROR(ENOMEM);
 962         }
 963
 964         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
 965         if (!ctx->tdata) {
 966             encode_close(avctx);
 967             return AVERROR(ENOMEM);
 968         }
 969
 970         for (j = 0; j < avctx->thread_count; j++) {
 971             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
 972                                             * TRELLIS_WIDTH
 973                                             * sizeof(*ctx->tdata->nodes));
 974             if (!ctx->tdata[j].nodes) {
 975                 encode_close(avctx);
 976                 return AVERROR(ENOMEM);
 977             }
 978             for (i = min_quant; i < max_quant + 2; i++) {
 979                 ctx->tdata[j].nodes[i].prev_node = -1;
 980                 ctx->tdata[j].nodes[i].bits      = 0;
 981                 ctx->tdata[j].nodes[i].score     = 0;
 982             }
 983         }
 984     } else {
 985         int ls = 0;
 986
 987         if (ctx->force_quant > 64) {
 988             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
 989             return AVERROR_INVALIDDATA;
 990         }
 991
 992         for (j = 0; j < 64; j++) {
 993             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
 994             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
 995         }
 996
 997         ctx->bits_per_mb = ls * 8;
 998         if (ctx->chroma_factor == CFACTOR_Y444)
 999             ctx->bits_per_mb += ls * 4;
1000         if (ctx->num_planes == 4)
1001             ctx->bits_per_mb += ls * 4;
1002     }
1003
1004     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1005                                   ctx->slices_per_picture *
1006                                   (2 + 2 * ctx->num_planes +
1007                                    (mps * ctx->bits_per_mb) / 8)
1008                                   + 200;
1009
1010     avctx->codec_tag   = ctx->profile_info->tag;
1011
1012     av_log(avctx, AV_LOG_DEBUG,
1013            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1014            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1015            interlaced ? "yes" : "no", ctx->bits_per_mb);
1016     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1017            ctx->frame_size_upper_bound);
1018
1019     return 0;
1020 }
1021
1022 #define OFFSET(x) offsetof(ProresContext, x)
1023 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1024
1025 static const AVOption options[] = {
1026     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1027         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
1028     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1029         { PRORES_PROFILE_STANDARD },
1030         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1031     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
1032         0, 0, VE, "profile" },
1033     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
1034         0, 0, VE, "profile" },
1035     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
1036         0, 0, VE, "profile" },
1037     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
1038         0, 0, VE, "profile" },
1039     { "vendor", "vendor ID", OFFSET(vendor),
1040         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1041     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1042         AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE },
1043     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1044         { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1045     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { -1 },
1046         0, 0, VE, "quant_mat" },
1047     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY },
1048         0, 0, VE, "quant_mat" },
1049     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT },
1050         0, 0, VE, "quant_mat" },
1051     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD },
1052         0, 0, VE, "quant_mat" },
1053     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ },
1054         0, 0, VE, "quant_mat" },
1055     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT },
1056         0, 0, VE, "quant_mat" },
1057     { NULL }
1058 };
1059
1060 static const AVClass proresenc_class = {
1061     .class_name = "ProRes encoder",
1062     .item_name  = av_default_item_name,
1063     .option     = options,
1064     .version    = LIBAVUTIL_VERSION_INT,
1065 };
1066
1067 AVCodec ff_prores_encoder = {
1068     .name           = "prores",
1069     .type           = AVMEDIA_TYPE_VIDEO,
1070     .id             = AV_CODEC_ID_PRORES,
1071     .priv_data_size = sizeof(ProresContext),
1072     .init           = encode_init,
1073     .close          = encode_close,
1074     .encode2        = encode_frame,
1075     .capabilities   = CODEC_CAP_SLICE_THREADS,
1076     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1077     .pix_fmts       = (const enum PixelFormat[]) {
1078                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
1079                       },
1080     .priv_class     = &proresenc_class,
1081 };