git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 enum {
  46     QUANT_MAT_PROXY = 0,
  47     QUANT_MAT_LT,
  48     QUANT_MAT_STANDARD,
  49     QUANT_MAT_HQ,
  50     QUANT_MAT_DEFAULT,
  51 };
  52
  53 static const uint8_t prores_quant_matrices[][64] = {
  54     { // proxy
  55          4,  7,  9, 11, 13, 14, 15, 63,
  56          7,  7, 11, 12, 14, 15, 63, 63,
  57          9, 11, 13, 14, 15, 63, 63, 63,
  58         11, 11, 13, 14, 63, 63, 63, 63,
  59         11, 13, 14, 63, 63, 63, 63, 63,
  60         13, 14, 63, 63, 63, 63, 63, 63,
  61         13, 63, 63, 63, 63, 63, 63, 63,
  62         63, 63, 63, 63, 63, 63, 63, 63,
  63     },
  64     { // LT
  65          4,  5,  6,  7,  9, 11, 13, 15,
  66          5,  5,  7,  8, 11, 13, 15, 17,
  67          6,  7,  9, 11, 13, 15, 15, 17,
  68          7,  7,  9, 11, 13, 15, 17, 19,
  69          7,  9, 11, 13, 14, 16, 19, 23,
  70          9, 11, 13, 14, 16, 19, 23, 29,
  71          9, 11, 13, 15, 17, 21, 28, 35,
  72         11, 13, 16, 17, 21, 28, 35, 41,
  73     },
  74     { // standard
  75          4,  4,  5,  5,  6,  7,  7,  9,
  76          4,  4,  5,  6,  7,  7,  9,  9,
  77          5,  5,  6,  7,  7,  9,  9, 10,
  78          5,  5,  6,  7,  7,  9,  9, 10,
  79          5,  6,  7,  7,  8,  9, 10, 12,
  80          6,  7,  7,  8,  9, 10, 12, 15,
  81          6,  7,  7,  9, 10, 11, 14, 17,
  82          7,  7,  9, 10, 11, 14, 17, 21,
  83     },
  84     { // high quality
  85          4,  4,  4,  4,  4,  4,  4,  4,
  86          4,  4,  4,  4,  4,  4,  4,  4,
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  5,
  89          4,  4,  4,  4,  4,  4,  5,  5,
  90          4,  4,  4,  4,  4,  5,  5,  6,
  91          4,  4,  4,  4,  5,  5,  6,  7,
  92          4,  4,  4,  4,  5,  6,  7,  7,
  93     },
  94     { // codec default
  95          4,  4,  4,  4,  4,  4,  4,  4,
  96          4,  4,  4,  4,  4,  4,  4,  4,
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103     },
 104 };
 105
 106 #define NUM_MB_LIMITS 4
 107 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 108     1620, // up to 720x576
 109     2700, // up to 960x720
 110     6075, // up to 1440x1080
 111     9216, // up to 2048x1152
 112 };
 113
 114 static const struct prores_profile {
 115     const char *full_name;
 116     uint32_t    tag;
 117     int         min_quant;
 118     int         max_quant;
 119     int         br_tab[NUM_MB_LIMITS];
 120     int         quant;
 121 } prores_profile_info[4] = {
 122     {
 123         .full_name = "proxy",
 124         .tag       = MKTAG('a', 'p', 'c', 'o'),
 125         .min_quant = 4,
 126         .max_quant = 8,
 127         .br_tab    = { 300, 242, 220, 194 },
 128         .quant     = QUANT_MAT_PROXY,
 129     },
 130     {
 131         .full_name = "LT",
 132         .tag       = MKTAG('a', 'p', 'c', 's'),
 133         .min_quant = 1,
 134         .max_quant = 9,
 135         .br_tab    = { 720, 560, 490, 440 },
 136         .quant     = QUANT_MAT_LT,
 137     },
 138     {
 139         .full_name = "standard",
 140         .tag       = MKTAG('a', 'p', 'c', 'n'),
 141         .min_quant = 1,
 142         .max_quant = 6,
 143         .br_tab    = { 1050, 808, 710, 632 },
 144         .quant     = QUANT_MAT_STANDARD,
 145     },
 146     {
 147         .full_name = "high quality",
 148         .tag       = MKTAG('a', 'p', 'c', 'h'),
 149         .min_quant = 1,
 150         .max_quant = 6,
 151         .br_tab    = { 1566, 1216, 1070, 950 },
 152         .quant     = QUANT_MAT_HQ,
 153     }
 154 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 155 };
 156
 157 #define TRELLIS_WIDTH 16
 158 #define SCORE_LIMIT   INT_MAX / 2
 159
 160 struct TrellisNode {
 161     int prev_node;
 162     int quant;
 163     int bits;
 164     int score;
 165 };
 166
 167 #define MAX_STORED_Q 16
 168
 169 typedef struct ProresThreadData {
 170     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 171     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
 172     int16_t custom_q[64];
 173     struct TrellisNode *nodes;
 174 } ProresThreadData;
 175
 176 typedef struct ProresContext {
 177     AVClass *class;
 178     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 179     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 180     int16_t quants[MAX_STORED_Q][64];
 181     int16_t custom_q[64];
 182     const uint8_t *quant_mat;
 183
 184     ProresDSPContext dsp;
 185     ScanTable  scantable;
 186
 187     int mb_width, mb_height;
 188     int mbs_per_slice;
 189     int num_chroma_blocks, chroma_factor;
 190     int slices_width;
 191     int slices_per_picture;
 192     int pictures_per_frame; // 1 for progressive, 2 for interlaced
 193     int cur_picture_idx;
 194     int num_planes;
 195     int bits_per_mb;
 196     int force_quant;
 197
 198     char *vendor;
 199     int quant_sel;
 200
 201     int frame_size_upper_bound;
 202
 203     int profile;
 204     const struct prores_profile *profile_info;
 205
 206     int *slice_q;
 207
 208     ProresThreadData *tdata;
 209 } ProresContext;
 210
 211 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 212                            int linesize, int x, int y, int w, int h,
 213                            DCTELEM *blocks, uint16_t *emu_buf,
 214                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 215 {
 216     const uint16_t *esrc;
 217     const int mb_width = 4 * blocks_per_mb;
 218     int elinesize;
 219     int i, j, k;
 220
 221     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 222         if (x >= w) {
 223             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 224                               * sizeof(*blocks));
 225             return;
 226         }
 227         if (x + mb_width <= w && y + 16 <= h) {
 228             esrc      = src;
 229             elinesize = linesize;
 230         } else {
 231             int bw, bh, pix;
 232
 233             esrc      = emu_buf;
 234             elinesize = 16 * sizeof(*emu_buf);
 235
 236             bw = FFMIN(w - x, mb_width);
 237             bh = FFMIN(h - y, 16);
 238
 239             for (j = 0; j < bh; j++) {
 240                 memcpy(emu_buf + j * 16,
 241                        (const uint8_t*)src + j * linesize,
 242                        bw * sizeof(*src));
 243                 pix = emu_buf[j * 16 + bw - 1];
 244                 for (k = bw; k < mb_width; k++)
 245                     emu_buf[j * 16 + k] = pix;
 246             }
 247             for (; j < 16; j++)
 248                 memcpy(emu_buf + j * 16,
 249                        emu_buf + (bh - 1) * 16,
 250                        mb_width * sizeof(*emu_buf));
 251         }
 252         if (!is_chroma) {
 253             ctx->dsp.fdct(esrc, elinesize, blocks);
 254             blocks += 64;
 255             if (blocks_per_mb > 2) {
 256                 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
 257                 blocks += 64;
 258             }
 259             ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
 260             blocks += 64;
 261             if (blocks_per_mb > 2) {
 262                 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
 263                 blocks += 64;
 264             }
 265         } else {
 266             ctx->dsp.fdct(esrc, elinesize, blocks);
 267             blocks += 64;
 268             ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
 269             blocks += 64;
 270             if (blocks_per_mb > 2) {
 271                 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
 272                 blocks += 64;
 273                 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
 274                 blocks += 64;
 275             }
 276         }
 277
 278         x += mb_width;
 279     }
 280 }
 281
 282 /**
 283  * Write an unsigned rice/exp golomb codeword.
 284  */
 285 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 286 {
 287     unsigned int rice_order, exp_order, switch_bits, switch_val;
 288     int exponent;
 289
 290     /* number of prefix bits to switch between Rice and expGolomb */
 291     switch_bits = (codebook & 3) + 1;
 292     rice_order  =  codebook >> 5;       /* rice code order */
 293     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 294
 295     switch_val  = switch_bits << rice_order;
 296
 297     if (val >= switch_val) {
 298         val -= switch_val - (1 << exp_order);
 299         exponent = av_log2(val);
 300
 301         put_bits(pb, exponent - exp_order + switch_bits, 0);
 302         put_bits(pb, 1, 1);
 303         put_bits(pb, exponent, val);
 304     } else {
 305         exponent = val >> rice_order;
 306
 307         if (exponent)
 308             put_bits(pb, exponent, 0);
 309         put_bits(pb, 1, 1);
 310         if (rice_order)
 311             put_sbits(pb, rice_order, val);
 312     }
 313 }
 314
 315 #define GET_SIGN(x)  ((x) >> 31)
 316 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 317
 318 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 319                        int blocks_per_slice, int scale)
 320 {
 321     int i;
 322     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 323
 324     prev_dc = (blocks[0] - 0x4000) / scale;
 325     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 326     sign     = 0;
 327     codebook = 3;
 328     blocks  += 64;
 329
 330     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 331         dc       = (blocks[0] - 0x4000) / scale;
 332         delta    = dc - prev_dc;
 333         new_sign = GET_SIGN(delta);
 334         delta    = (delta ^ sign) - sign;
 335         code     = MAKE_CODE(delta);
 336         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 337         codebook = (code + (code & 1)) >> 1;
 338         codebook = FFMIN(codebook, 3);
 339         sign     = new_sign;
 340         prev_dc  = dc;
 341     }
 342 }
 343
 344 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 345                        int blocks_per_slice,
 346                        int plane_size_factor,
 347                        const uint8_t *scan, const int16_t *qmat)
 348 {
 349     int idx, i;
 350     int run, level, run_cb, lev_cb;
 351     int max_coeffs, abs_level;
 352
 353     max_coeffs = blocks_per_slice << 6;
 354     run_cb     = ff_prores_run_to_cb_index[4];
 355     lev_cb     = ff_prores_lev_to_cb_index[2];
 356     run        = 0;
 357
 358     for (i = 1; i < 64; i++) {
 359         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 360             level = blocks[idx] / qmat[scan[i]];
 361             if (level) {
 362                 abs_level = FFABS(level);
 363                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 364                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 365                                     abs_level - 1);
 366                 put_sbits(pb, 1, GET_SIGN(level));
 367
 368                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 369                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 370                 run    = 0;
 371             } else {
 372                 run++;
 373             }
 374         }
 375     }
 376 }
 377
 378 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 379                               const uint16_t *src, int linesize,
 380                               int mbs_per_slice, DCTELEM *blocks,
 381                               int blocks_per_mb, int plane_size_factor,
 382                               const int16_t *qmat)
 383 {
 384     int blocks_per_slice, saved_pos;
 385
 386     saved_pos = put_bits_count(pb);
 387     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 388
 389     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 390     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 391                ctx->scantable.permutated, qmat);
 392     flush_put_bits(pb);
 393
 394     return (put_bits_count(pb) - saved_pos) >> 3;
 395 }
 396
 397 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 398                         PutBitContext *pb,
 399                         int sizes[4], int x, int y, int quant,
 400                         int mbs_per_slice)
 401 {
 402     ProresContext *ctx = avctx->priv_data;
 403     int i, xp, yp;
 404     int total_size = 0;
 405     const uint16_t *src;
 406     int slice_width_factor = av_log2(mbs_per_slice);
 407     int num_cblocks, pwidth, linesize, line_add;
 408     int plane_factor, is_chroma;
 409     uint16_t *qmat;
 410
 411     if (ctx->pictures_per_frame == 1)
 412         line_add = 0;
 413     else
 414         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 415
 416     if (ctx->force_quant) {
 417         qmat = ctx->quants[0];
 418     } else if (quant < MAX_STORED_Q) {
 419         qmat = ctx->quants[quant];
 420     } else {
 421         qmat = ctx->custom_q;
 422         for (i = 0; i < 64; i++)
 423             qmat[i] = ctx->quant_mat[i] * quant;
 424     }
 425
 426     for (i = 0; i < ctx->num_planes; i++) {
 427         is_chroma    = (i == 1 || i == 2);
 428         plane_factor = slice_width_factor + 2;
 429         if (is_chroma)
 430             plane_factor += ctx->chroma_factor - 3;
 431         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 432             xp          = x << 4;
 433             yp          = y << 4;
 434             num_cblocks = 4;
 435             pwidth      = avctx->width;
 436         } else {
 437             xp          = x << 3;
 438             yp          = y << 4;
 439             num_cblocks = 2;
 440             pwidth      = avctx->width >> 1;
 441         }
 442
 443         linesize = pic->linesize[i] * ctx->pictures_per_frame;
 444         src = (const uint16_t*)(pic->data[i] + yp * linesize +
 445                                 line_add * pic->linesize[i]) + xp;
 446
 447         get_slice_data(ctx, src, linesize, xp, yp,
 448                        pwidth, avctx->height / ctx->pictures_per_frame,
 449                        ctx->blocks[0], ctx->emu_buf,
 450                        mbs_per_slice, num_cblocks, is_chroma);
 451         sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
 452                                       mbs_per_slice, ctx->blocks[0],
 453                                       num_cblocks, plane_factor,
 454                                       qmat);
 455         total_size += sizes[i];
 456     }
 457     return total_size;
 458 }
 459
 460 static inline int estimate_vlc(unsigned codebook, int val)
 461 {
 462     unsigned int rice_order, exp_order, switch_bits, switch_val;
 463     int exponent;
 464
 465     /* number of prefix bits to switch between Rice and expGolomb */
 466     switch_bits = (codebook & 3) + 1;
 467     rice_order  =  codebook >> 5;       /* rice code order */
 468     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 469
 470     switch_val  = switch_bits << rice_order;
 471
 472     if (val >= switch_val) {
 473         val -= switch_val - (1 << exp_order);
 474         exponent = av_log2(val);
 475
 476         return exponent * 2 - exp_order + switch_bits + 1;
 477     } else {
 478         return (val >> rice_order) + rice_order + 1;
 479     }
 480 }
 481
 482 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 483                         int scale)
 484 {
 485     int i;
 486     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 487     int bits;
 488
 489     prev_dc  = (blocks[0] - 0x4000) / scale;
 490     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 491     sign     = 0;
 492     codebook = 3;
 493     blocks  += 64;
 494     *error  += FFABS(blocks[0] - 0x4000) % scale;
 495
 496     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 497         dc       = (blocks[0] - 0x4000) / scale;
 498         *error  += FFABS(blocks[0] - 0x4000) % scale;
 499         delta    = dc - prev_dc;
 500         new_sign = GET_SIGN(delta);
 501         delta    = (delta ^ sign) - sign;
 502         code     = MAKE_CODE(delta);
 503         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 504         codebook = (code + (code & 1)) >> 1;
 505         codebook = FFMIN(codebook, 3);
 506         sign     = new_sign;
 507         prev_dc  = dc;
 508     }
 509
 510     return bits;
 511 }
 512
 513 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 514                         int plane_size_factor,
 515                         const uint8_t *scan, const int16_t *qmat)
 516 {
 517     int idx, i;
 518     int run, level, run_cb, lev_cb;
 519     int max_coeffs, abs_level;
 520     int bits = 0;
 521
 522     max_coeffs = blocks_per_slice << 6;
 523     run_cb     = ff_prores_run_to_cb_index[4];
 524     lev_cb     = ff_prores_lev_to_cb_index[2];
 525     run        = 0;
 526
 527     for (i = 1; i < 64; i++) {
 528         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 529             level   = blocks[idx] / qmat[scan[i]];
 530             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 531             if (level) {
 532                 abs_level = FFABS(level);
 533                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 534                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 535                                      abs_level - 1) + 1;
 536
 537                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 538                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 539                 run    = 0;
 540             } else {
 541                 run++;
 542             }
 543         }
 544     }
 545
 546     return bits;
 547 }
 548
 549 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 550                                 const uint16_t *src, int linesize,
 551                                 int mbs_per_slice,
 552                                 int blocks_per_mb, int plane_size_factor,
 553                                 const int16_t *qmat, ProresThreadData *td)
 554 {
 555     int blocks_per_slice;
 556     int bits;
 557
 558     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 559
 560     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
 561     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
 562                          plane_size_factor, ctx->scantable.permutated, qmat);
 563
 564     return FFALIGN(bits, 8);
 565 }
 566
 567 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 568                             int trellis_node, int x, int y, int mbs_per_slice,
 569                             ProresThreadData *td)
 570 {
 571     ProresContext *ctx = avctx->priv_data;
 572     int i, q, pq, xp, yp;
 573     const uint16_t *src;
 574     int slice_width_factor = av_log2(mbs_per_slice);
 575     int num_cblocks[MAX_PLANES], pwidth;
 576     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 577     const int min_quant = ctx->profile_info->min_quant;
 578     const int max_quant = ctx->profile_info->max_quant;
 579     int error, bits, bits_limit;
 580     int mbs, prev, cur, new_score;
 581     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 582     int overquant;
 583     uint16_t *qmat;
 584     int linesize[4], line_add;
 585
 586     if (ctx->pictures_per_frame == 1)
 587         line_add = 0;
 588     else
 589         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 590     mbs = x + mbs_per_slice;
 591
 592     for (i = 0; i < ctx->num_planes; i++) {
 593         is_chroma[i]    = (i == 1 || i == 2);
 594         plane_factor[i] = slice_width_factor + 2;
 595         if (is_chroma[i])
 596             plane_factor[i] += ctx->chroma_factor - 3;
 597         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 598             xp             = x << 4;
 599             yp             = y << 4;
 600             num_cblocks[i] = 4;
 601             pwidth         = avctx->width;
 602         } else {
 603             xp             = x << 3;
 604             yp             = y << 4;
 605             num_cblocks[i] = 2;
 606             pwidth         = avctx->width >> 1;
 607         }
 608
 609         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
 610         src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
 611                                 line_add * pic->linesize[i]) + xp;
 612
 613         get_slice_data(ctx, src, linesize[i], xp, yp,
 614                        pwidth, avctx->height / ctx->pictures_per_frame,
 615                        td->blocks[i], td->emu_buf,
 616                        mbs_per_slice, num_cblocks[i], is_chroma[i]);
 617     }
 618
 619     for (q = min_quant; q < max_quant + 2; q++) {
 620         td->nodes[trellis_node + q].prev_node = -1;
 621         td->nodes[trellis_node + q].quant     = q;
 622     }
 623
 624     // todo: maybe perform coarser quantising to fit into frame size when needed
 625     for (q = min_quant; q <= max_quant; q++) {
 626         bits  = 0;
 627         error = 0;
 628         for (i = 0; i < ctx->num_planes; i++) {
 629             bits += estimate_slice_plane(ctx, &error, i,
 630                                          src, linesize[i],
 631                                          mbs_per_slice,
 632                                          num_cblocks[i], plane_factor[i],
 633                                          ctx->quants[q], td);
 634         }
 635         if (bits > 65000 * 8) {
 636             error = SCORE_LIMIT;
 637             break;
 638         }
 639         slice_bits[q]  = bits;
 640         slice_score[q] = error;
 641     }
 642     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 643         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 644         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 645         overquant = max_quant;
 646     } else {
 647         for (q = max_quant + 1; q < 128; q++) {
 648             bits  = 0;
 649             error = 0;
 650             if (q < MAX_STORED_Q) {
 651                 qmat = ctx->quants[q];
 652             } else {
 653                 qmat = td->custom_q;
 654                 for (i = 0; i < 64; i++)
 655                     qmat[i] = ctx->quant_mat[i] * q;
 656             }
 657             for (i = 0; i < ctx->num_planes; i++) {
 658                 bits += estimate_slice_plane(ctx, &error, i,
 659                                              src, linesize[i],
 660                                              mbs_per_slice,
 661                                              num_cblocks[i], plane_factor[i],
 662                                              qmat, td);
 663             }
 664             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 665                 break;
 666         }
 667
 668         slice_bits[max_quant + 1]  = bits;
 669         slice_score[max_quant + 1] = error;
 670         overquant = q;
 671     }
 672     td->nodes[trellis_node + max_quant + 1].quant = overquant;
 673
 674     bits_limit = mbs * ctx->bits_per_mb;
 675     for (pq = min_quant; pq < max_quant + 2; pq++) {
 676         prev = trellis_node - TRELLIS_WIDTH + pq;
 677
 678         for (q = min_quant; q < max_quant + 2; q++) {
 679             cur = trellis_node + q;
 680
 681             bits  = td->nodes[prev].bits + slice_bits[q];
 682             error = slice_score[q];
 683             if (bits > bits_limit)
 684                 error = SCORE_LIMIT;
 685
 686             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 687                 new_score = td->nodes[prev].score + error;
 688             else
 689                 new_score = SCORE_LIMIT;
 690             if (td->nodes[cur].prev_node == -1 ||
 691                 td->nodes[cur].score >= new_score) {
 692
 693                 td->nodes[cur].bits      = bits;
 694                 td->nodes[cur].score     = new_score;
 695                 td->nodes[cur].prev_node = prev;
 696             }
 697         }
 698     }
 699
 700     error = td->nodes[trellis_node + min_quant].score;
 701     pq    = trellis_node + min_quant;
 702     for (q = min_quant + 1; q < max_quant + 2; q++) {
 703         if (td->nodes[trellis_node + q].score <= error) {
 704             error = td->nodes[trellis_node + q].score;
 705             pq    = trellis_node + q;
 706         }
 707     }
 708
 709     return pq;
 710 }
 711
 712 static int find_quant_thread(AVCodecContext *avctx, void *arg,
 713                              int jobnr, int threadnr)
 714 {
 715     ProresContext *ctx = avctx->priv_data;
 716     ProresThreadData *td = ctx->tdata + threadnr;
 717     int mbs_per_slice = ctx->mbs_per_slice;
 718     int x, y = jobnr, mb, q = 0;
 719
 720     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 721         while (ctx->mb_width - x < mbs_per_slice)
 722             mbs_per_slice >>= 1;
 723         q = find_slice_quant(avctx, avctx->coded_frame,
 724                              (mb + 1) * TRELLIS_WIDTH, x, y,
 725                              mbs_per_slice, td);
 726     }
 727
 728     for (x = ctx->slices_width - 1; x >= 0; x--) {
 729         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
 730         q = td->nodes[q].prev_node;
 731     }
 732
 733     return 0;
 734 }
 735
 736 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 737                         const AVFrame *pic, int *got_packet)
 738 {
 739     ProresContext *ctx = avctx->priv_data;
 740     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 741     uint8_t *picture_size_pos;
 742     PutBitContext pb;
 743     int x, y, i, mb, q = 0;
 744     int sizes[4] = { 0 };
 745     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 746     int frame_size, picture_size, slice_size;
 747     int pkt_size, ret;
 748     uint8_t frame_flags;
 749
 750     *avctx->coded_frame           = *pic;
 751     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 752     avctx->coded_frame->key_frame = 1;
 753
 754     pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
 755
 756     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 757         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 758         return ret;
 759     }
 760
 761     orig_buf = pkt->data;
 762
 763     // frame atom
 764     orig_buf += 4;                              // frame size
 765     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 766     buf = orig_buf;
 767
 768     // frame header
 769     tmp = buf;
 770     buf += 2;                                   // frame header size will be stored here
 771     bytestream_put_be16  (&buf, 0);             // version 1
 772     bytestream_put_buffer(&buf, ctx->vendor, 4);
 773     bytestream_put_be16  (&buf, avctx->width);
 774     bytestream_put_be16  (&buf, avctx->height);
 775
 776     frame_flags = ctx->chroma_factor << 6;
 777     if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
 778         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
 779     bytestream_put_byte  (&buf, frame_flags);
 780
 781     bytestream_put_byte  (&buf, 0);             // reserved
 782     bytestream_put_byte  (&buf, avctx->color_primaries);
 783     bytestream_put_byte  (&buf, avctx->color_trc);
 784     bytestream_put_byte  (&buf, avctx->colorspace);
 785     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 786     bytestream_put_byte  (&buf, 0);             // reserved
 787     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 788         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 789         // luma quantisation matrix
 790         for (i = 0; i < 64; i++)
 791             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 792         // chroma quantisation matrix
 793         for (i = 0; i < 64; i++)
 794             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 795     } else {
 796         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 797     }
 798     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 799
 800     for (ctx->cur_picture_idx = 0;
 801          ctx->cur_picture_idx < ctx->pictures_per_frame;
 802          ctx->cur_picture_idx++) {
 803         // picture header
 804         picture_size_pos = buf + 1;
 805         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 806         buf += 4;                                   // picture data size will be stored here
 807         bytestream_put_be16  (&buf, ctx->slices_per_picture);
 808         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 809
 810         // seek table - will be filled during slice encoding
 811         slice_sizes = buf;
 812         buf += ctx->slices_per_picture * 2;
 813
 814         // slices
 815         if (!ctx->force_quant) {
 816             ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
 817                                   ctx->mb_height);
 818             if (ret)
 819                 return ret;
 820         }
 821
 822         for (y = 0; y < ctx->mb_height; y++) {
 823             int mbs_per_slice = ctx->mbs_per_slice;
 824             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 825                 q = ctx->force_quant ? ctx->force_quant
 826                                      : ctx->slice_q[mb + y * ctx->slices_width];
 827
 828                 while (ctx->mb_width - x < mbs_per_slice)
 829                     mbs_per_slice >>= 1;
 830
 831                 bytestream_put_byte(&buf, slice_hdr_size << 3);
 832                 slice_hdr = buf;
 833                 buf += slice_hdr_size - 1;
 834                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 835                 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 836
 837                 bytestream_put_byte(&slice_hdr, q);
 838                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 839                 for (i = 0; i < ctx->num_planes - 1; i++) {
 840                     bytestream_put_be16(&slice_hdr, sizes[i]);
 841                     slice_size += sizes[i];
 842                 }
 843                 bytestream_put_be16(&slice_sizes, slice_size);
 844                 buf += slice_size - slice_hdr_size;
 845             }
 846         }
 847
 848         if (ctx->pictures_per_frame == 1)
 849             picture_size = buf - picture_size_pos - 6;
 850         else
 851             picture_size = buf - picture_size_pos + 1;
 852         bytestream_put_be32(&picture_size_pos, picture_size);
 853     }
 854
 855     orig_buf -= 8;
 856     frame_size = buf - orig_buf;
 857     bytestream_put_be32(&orig_buf, frame_size);
 858
 859     pkt->size   = frame_size;
 860     pkt->flags |= AV_PKT_FLAG_KEY;
 861     *got_packet = 1;
 862
 863     return 0;
 864 }
 865
 866 static av_cold int encode_close(AVCodecContext *avctx)
 867 {
 868     ProresContext *ctx = avctx->priv_data;
 869     int i;
 870
 871     if (avctx->coded_frame->data[0])
 872         avctx->release_buffer(avctx, avctx->coded_frame);
 873
 874     av_freep(&avctx->coded_frame);
 875
 876     if (ctx->tdata) {
 877         for (i = 0; i < avctx->thread_count; i++)
 878             av_free(ctx->tdata[i].nodes);
 879     }
 880     av_freep(&ctx->tdata);
 881     av_freep(&ctx->slice_q);
 882
 883     return 0;
 884 }
 885
 886 static av_cold int encode_init(AVCodecContext *avctx)
 887 {
 888     ProresContext *ctx = avctx->priv_data;
 889     int mps;
 890     int i, j;
 891     int min_quant, max_quant;
 892     int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 893
 894     avctx->bits_per_raw_sample = 10;
 895     avctx->coded_frame = avcodec_alloc_frame();
 896     if (!avctx->coded_frame)
 897         return AVERROR(ENOMEM);
 898
 899     ff_proresdsp_init(&ctx->dsp);
 900     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 901                       interlaced ? ff_prores_interlaced_scan
 902                                  : ff_prores_progressive_scan);
 903
 904     mps = ctx->mbs_per_slice;
 905     if (mps & (mps - 1)) {
 906         av_log(avctx, AV_LOG_ERROR,
 907                "there should be an integer power of two MBs per slice\n");
 908         return AVERROR(EINVAL);
 909     }
 910
 911     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 912                          ? CFACTOR_Y422
 913                          : CFACTOR_Y444;
 914     ctx->profile_info  = prores_profile_info + ctx->profile;
 915     ctx->num_planes    = 3;
 916
 917     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 918
 919     if (interlaced)
 920         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
 921     else
 922         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
 923
 924     ctx->slices_width  = ctx->mb_width / mps;
 925     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 926     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
 927     ctx->pictures_per_frame = 1 + interlaced;
 928
 929     if (ctx->quant_sel == -1)
 930         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
 931     else
 932         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
 933
 934     if (strlen(ctx->vendor) != 4) {
 935         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 936         return AVERROR_INVALIDDATA;
 937     }
 938
 939     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
 940     if (!ctx->force_quant) {
 941         if (!ctx->bits_per_mb) {
 942             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 943                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
 944                                            ctx->pictures_per_frame)
 945                     break;
 946             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 947         } else if (ctx->bits_per_mb < 128) {
 948             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
 949             return AVERROR_INVALIDDATA;
 950         }
 951
 952         min_quant = ctx->profile_info->min_quant;
 953         max_quant = ctx->profile_info->max_quant;
 954         for (i = min_quant; i < MAX_STORED_Q; i++) {
 955             for (j = 0; j < 64; j++)
 956                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
 957         }
 958
 959         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
 960         if (!ctx->slice_q) {
 961             encode_close(avctx);
 962             return AVERROR(ENOMEM);
 963         }
 964
 965         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
 966         if (!ctx->tdata) {
 967             encode_close(avctx);
 968             return AVERROR(ENOMEM);
 969         }
 970
 971         for (j = 0; j < avctx->thread_count; j++) {
 972             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
 973                                             * TRELLIS_WIDTH
 974                                             * sizeof(*ctx->tdata->nodes));
 975             if (!ctx->tdata[j].nodes) {
 976                 encode_close(avctx);
 977                 return AVERROR(ENOMEM);
 978             }
 979             for (i = min_quant; i < max_quant + 2; i++) {
 980                 ctx->tdata[j].nodes[i].prev_node = -1;
 981                 ctx->tdata[j].nodes[i].bits      = 0;
 982                 ctx->tdata[j].nodes[i].score     = 0;
 983             }
 984         }
 985     } else {
 986         int ls = 0;
 987
 988         if (ctx->force_quant > 64) {
 989             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
 990             return AVERROR_INVALIDDATA;
 991         }
 992
 993         for (j = 0; j < 64; j++) {
 994             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
 995             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
 996         }
 997
 998         ctx->bits_per_mb = ls * 8;
 999         if (ctx->chroma_factor == CFACTOR_Y444)
1000             ctx->bits_per_mb += ls * 4;
1001         if (ctx->num_planes == 4)
1002             ctx->bits_per_mb += ls * 4;
1003     }
1004
1005     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1006                                   ctx->slices_per_picture *
1007                                   (2 + 2 * ctx->num_planes +
1008                                    (mps * ctx->bits_per_mb) / 8)
1009                                   + 200;
1010
1011     avctx->codec_tag   = ctx->profile_info->tag;
1012
1013     av_log(avctx, AV_LOG_DEBUG,
1014            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1015            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1016            interlaced ? "yes" : "no", ctx->bits_per_mb);
1017     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1018            ctx->frame_size_upper_bound);
1019
1020     return 0;
1021 }
1022
1023 #define OFFSET(x) offsetof(ProresContext, x)
1024 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1025
1026 static const AVOption options[] = {
1027     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1028         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
1029     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1030         { PRORES_PROFILE_STANDARD },
1031         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1032     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
1033         0, 0, VE, "profile" },
1034     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
1035         0, 0, VE, "profile" },
1036     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
1037         0, 0, VE, "profile" },
1038     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
1039         0, 0, VE, "profile" },
1040     { "vendor", "vendor ID", OFFSET(vendor),
1041         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1042     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1043         AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE },
1044     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1045         { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1046     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { -1 },
1047         0, 0, VE, "quant_mat" },
1048     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY },
1049         0, 0, VE, "quant_mat" },
1050     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT },
1051         0, 0, VE, "quant_mat" },
1052     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD },
1053         0, 0, VE, "quant_mat" },
1054     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ },
1055         0, 0, VE, "quant_mat" },
1056     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT },
1057         0, 0, VE, "quant_mat" },
1058     { NULL }
1059 };
1060
1061 static const AVClass proresenc_class = {
1062     .class_name = "ProRes encoder",
1063     .item_name  = av_default_item_name,
1064     .option     = options,
1065     .version    = LIBAVUTIL_VERSION_INT,
1066 };
1067
1068 AVCodec ff_prores_encoder = {
1069     .name           = "prores",
1070     .type           = AVMEDIA_TYPE_VIDEO,
1071     .id             = AV_CODEC_ID_PRORES,
1072     .priv_data_size = sizeof(ProresContext),
1073     .init           = encode_init,
1074     .close          = encode_close,
1075     .encode2        = encode_frame,
1076     .capabilities   = CODEC_CAP_SLICE_THREADS,
1077     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1078     .pix_fmts       = (const enum PixelFormat[]) {
1079                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
1080                       },
1081     .priv_class     = &proresenc_class,
1082 };