git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 enum {
  46     QUANT_MAT_PROXY = 0,
  47     QUANT_MAT_LT,
  48     QUANT_MAT_STANDARD,
  49     QUANT_MAT_HQ,
  50     QUANT_MAT_DEFAULT,
  51 };
  52
  53 static const uint8_t prores_quant_matrices[][64] = {
  54     { // proxy
  55          4,  7,  9, 11, 13, 14, 15, 63,
  56          7,  7, 11, 12, 14, 15, 63, 63,
  57          9, 11, 13, 14, 15, 63, 63, 63,
  58         11, 11, 13, 14, 63, 63, 63, 63,
  59         11, 13, 14, 63, 63, 63, 63, 63,
  60         13, 14, 63, 63, 63, 63, 63, 63,
  61         13, 63, 63, 63, 63, 63, 63, 63,
  62         63, 63, 63, 63, 63, 63, 63, 63,
  63     },
  64     { // LT
  65          4,  5,  6,  7,  9, 11, 13, 15,
  66          5,  5,  7,  8, 11, 13, 15, 17,
  67          6,  7,  9, 11, 13, 15, 15, 17,
  68          7,  7,  9, 11, 13, 15, 17, 19,
  69          7,  9, 11, 13, 14, 16, 19, 23,
  70          9, 11, 13, 14, 16, 19, 23, 29,
  71          9, 11, 13, 15, 17, 21, 28, 35,
  72         11, 13, 16, 17, 21, 28, 35, 41,
  73     },
  74     { // standard
  75          4,  4,  5,  5,  6,  7,  7,  9,
  76          4,  4,  5,  6,  7,  7,  9,  9,
  77          5,  5,  6,  7,  7,  9,  9, 10,
  78          5,  5,  6,  7,  7,  9,  9, 10,
  79          5,  6,  7,  7,  8,  9, 10, 12,
  80          6,  7,  7,  8,  9, 10, 12, 15,
  81          6,  7,  7,  9, 10, 11, 14, 17,
  82          7,  7,  9, 10, 11, 14, 17, 21,
  83     },
  84     { // high quality
  85          4,  4,  4,  4,  4,  4,  4,  4,
  86          4,  4,  4,  4,  4,  4,  4,  4,
  87          4,  4,  4,  4,  4,  4,  4,  4,
  88          4,  4,  4,  4,  4,  4,  4,  5,
  89          4,  4,  4,  4,  4,  4,  5,  5,
  90          4,  4,  4,  4,  4,  5,  5,  6,
  91          4,  4,  4,  4,  5,  5,  6,  7,
  92          4,  4,  4,  4,  5,  6,  7,  7,
  93     },
  94     { // codec default
  95          4,  4,  4,  4,  4,  4,  4,  4,
  96          4,  4,  4,  4,  4,  4,  4,  4,
  97          4,  4,  4,  4,  4,  4,  4,  4,
  98          4,  4,  4,  4,  4,  4,  4,  4,
  99          4,  4,  4,  4,  4,  4,  4,  4,
 100          4,  4,  4,  4,  4,  4,  4,  4,
 101          4,  4,  4,  4,  4,  4,  4,  4,
 102          4,  4,  4,  4,  4,  4,  4,  4,
 103     },
 104 };
 105
 106 #define NUM_MB_LIMITS 4
 107 static const int prores_mb_limits[NUM_MB_LIMITS] = {
 108     1620, // up to 720x576
 109     2700, // up to 960x720
 110     6075, // up to 1440x1080
 111     9216, // up to 2048x1152
 112 };
 113
 114 static const struct prores_profile {
 115     const char *full_name;
 116     uint32_t    tag;
 117     int         min_quant;
 118     int         max_quant;
 119     int         br_tab[NUM_MB_LIMITS];
 120     int         quant;
 121 } prores_profile_info[4] = {
 122     {
 123         .full_name = "proxy",
 124         .tag       = MKTAG('a', 'p', 'c', 'o'),
 125         .min_quant = 4,
 126         .max_quant = 8,
 127         .br_tab    = { 300, 242, 220, 194 },
 128         .quant     = QUANT_MAT_PROXY,
 129     },
 130     {
 131         .full_name = "LT",
 132         .tag       = MKTAG('a', 'p', 'c', 's'),
 133         .min_quant = 1,
 134         .max_quant = 9,
 135         .br_tab    = { 720, 560, 490, 440 },
 136         .quant     = QUANT_MAT_LT,
 137     },
 138     {
 139         .full_name = "standard",
 140         .tag       = MKTAG('a', 'p', 'c', 'n'),
 141         .min_quant = 1,
 142         .max_quant = 6,
 143         .br_tab    = { 1050, 808, 710, 632 },
 144         .quant     = QUANT_MAT_STANDARD,
 145     },
 146     {
 147         .full_name = "high quality",
 148         .tag       = MKTAG('a', 'p', 'c', 'h'),
 149         .min_quant = 1,
 150         .max_quant = 6,
 151         .br_tab    = { 1566, 1216, 1070, 950 },
 152         .quant     = QUANT_MAT_HQ,
 153     }
 154 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 155 };
 156
 157 #define TRELLIS_WIDTH 16
 158 #define SCORE_LIMIT   INT_MAX / 2
 159
 160 struct TrellisNode {
 161     int prev_node;
 162     int quant;
 163     int bits;
 164     int score;
 165 };
 166
 167 #define MAX_STORED_Q 16
 168
 169 typedef struct ProresContext {
 170     AVClass *class;
 171     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 172     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 173     int16_t quants[MAX_STORED_Q][64];
 174     int16_t custom_q[64];
 175     const uint8_t *quant_mat;
 176
 177     ProresDSPContext dsp;
 178     ScanTable  scantable;
 179
 180     int mb_width, mb_height;
 181     int mbs_per_slice;
 182     int num_chroma_blocks, chroma_factor;
 183     int slices_width;
 184     int num_slices;
 185     int num_planes;
 186     int bits_per_mb;
 187
 188     char *vendor;
 189     int quant_sel;
 190
 191     int frame_size;
 192
 193     int profile;
 194     const struct prores_profile *profile_info;
 195
 196     struct TrellisNode *nodes;
 197     int *slice_q;
 198 } ProresContext;
 199
 200 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 201                            int linesize, int x, int y, int w, int h,
 202                            DCTELEM *blocks,
 203                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 204 {
 205     const uint16_t *esrc;
 206     const int mb_width = 4 * blocks_per_mb;
 207     int elinesize;
 208     int i, j, k;
 209
 210     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 211         if (x >= w) {
 212             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 213                               * sizeof(*blocks));
 214             return;
 215         }
 216         if (x + mb_width <= w && y + 16 <= h) {
 217             esrc      = src;
 218             elinesize = linesize;
 219         } else {
 220             int bw, bh, pix;
 221
 222             esrc      = ctx->emu_buf;
 223             elinesize = 16 * sizeof(*ctx->emu_buf);
 224
 225             bw = FFMIN(w - x, mb_width);
 226             bh = FFMIN(h - y, 16);
 227
 228             for (j = 0; j < bh; j++) {
 229                 memcpy(ctx->emu_buf + j * 16,
 230                        (const uint8_t*)src + j * linesize,
 231                        bw * sizeof(*src));
 232                 pix = ctx->emu_buf[j * 16 + bw - 1];
 233                 for (k = bw; k < mb_width; k++)
 234                     ctx->emu_buf[j * 16 + k] = pix;
 235             }
 236             for (; j < 16; j++)
 237                 memcpy(ctx->emu_buf + j * 16,
 238                        ctx->emu_buf + (bh - 1) * 16,
 239                        mb_width * sizeof(*ctx->emu_buf));
 240         }
 241         if (!is_chroma) {
 242             ctx->dsp.fdct(esrc, elinesize, blocks);
 243             blocks += 64;
 244             if (blocks_per_mb > 2) {
 245                 ctx->dsp.fdct(src + 8, linesize, blocks);
 246                 blocks += 64;
 247             }
 248             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 249             blocks += 64;
 250             if (blocks_per_mb > 2) {
 251                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 252                 blocks += 64;
 253             }
 254         } else {
 255             ctx->dsp.fdct(esrc, elinesize, blocks);
 256             blocks += 64;
 257             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 258             blocks += 64;
 259             if (blocks_per_mb > 2) {
 260                 ctx->dsp.fdct(src + 8, linesize, blocks);
 261                 blocks += 64;
 262                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 263                 blocks += 64;
 264             }
 265         }
 266
 267         x += mb_width;
 268     }
 269 }
 270
 271 /**
 272  * Write an unsigned rice/exp golomb codeword.
 273  */
 274 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 275 {
 276     unsigned int rice_order, exp_order, switch_bits, switch_val;
 277     int exponent;
 278
 279     /* number of prefix bits to switch between Rice and expGolomb */
 280     switch_bits = (codebook & 3) + 1;
 281     rice_order  =  codebook >> 5;       /* rice code order */
 282     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 283
 284     switch_val  = switch_bits << rice_order;
 285
 286     if (val >= switch_val) {
 287         val -= switch_val - (1 << exp_order);
 288         exponent = av_log2(val);
 289
 290         put_bits(pb, exponent - exp_order + switch_bits, 0);
 291         put_bits(pb, 1, 1);
 292         put_bits(pb, exponent, val);
 293     } else {
 294         exponent = val >> rice_order;
 295
 296         if (exponent)
 297             put_bits(pb, exponent, 0);
 298         put_bits(pb, 1, 1);
 299         if (rice_order)
 300             put_sbits(pb, rice_order, val);
 301     }
 302 }
 303
 304 #define GET_SIGN(x)  ((x) >> 31)
 305 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 306
 307 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 308                        int blocks_per_slice, int scale)
 309 {
 310     int i;
 311     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 312
 313     prev_dc = (blocks[0] - 0x4000) / scale;
 314     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 315     sign     = 0;
 316     codebook = 3;
 317     blocks  += 64;
 318
 319     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 320         dc       = (blocks[0] - 0x4000) / scale;
 321         delta    = dc - prev_dc;
 322         new_sign = GET_SIGN(delta);
 323         delta    = (delta ^ sign) - sign;
 324         code     = MAKE_CODE(delta);
 325         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 326         codebook = (code + (code & 1)) >> 1;
 327         codebook = FFMIN(codebook, 3);
 328         sign     = new_sign;
 329         prev_dc  = dc;
 330     }
 331 }
 332
 333 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 334                        int blocks_per_slice,
 335                        int plane_size_factor,
 336                        const uint8_t *scan, const int16_t *qmat)
 337 {
 338     int idx, i;
 339     int run, level, run_cb, lev_cb;
 340     int max_coeffs, abs_level;
 341
 342     max_coeffs = blocks_per_slice << 6;
 343     run_cb     = ff_prores_run_to_cb_index[4];
 344     lev_cb     = ff_prores_lev_to_cb_index[2];
 345     run        = 0;
 346
 347     for (i = 1; i < 64; i++) {
 348         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 349             level = blocks[idx] / qmat[scan[i]];
 350             if (level) {
 351                 abs_level = FFABS(level);
 352                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 353                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 354                                     abs_level - 1);
 355                 put_sbits(pb, 1, GET_SIGN(level));
 356
 357                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 358                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 359                 run    = 0;
 360             } else {
 361                 run++;
 362             }
 363         }
 364     }
 365 }
 366
 367 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 368                               const uint16_t *src, int linesize,
 369                               int mbs_per_slice, DCTELEM *blocks,
 370                               int blocks_per_mb, int plane_size_factor,
 371                               const int16_t *qmat)
 372 {
 373     int blocks_per_slice, saved_pos;
 374
 375     saved_pos = put_bits_count(pb);
 376     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 377
 378     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 379     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 380                ctx->scantable.permutated, qmat);
 381     flush_put_bits(pb);
 382
 383     return (put_bits_count(pb) - saved_pos) >> 3;
 384 }
 385
 386 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 387                         PutBitContext *pb,
 388                         int sizes[4], int x, int y, int quant,
 389                         int mbs_per_slice)
 390 {
 391     ProresContext *ctx = avctx->priv_data;
 392     int i, xp, yp;
 393     int total_size = 0;
 394     const uint16_t *src;
 395     int slice_width_factor = av_log2(mbs_per_slice);
 396     int num_cblocks, pwidth;
 397     int plane_factor, is_chroma;
 398     uint16_t *qmat;
 399
 400     if (quant < MAX_STORED_Q) {
 401         qmat = ctx->quants[quant];
 402     } else {
 403         qmat = ctx->custom_q;
 404         for (i = 0; i < 64; i++)
 405             qmat[i] = ctx->quant_mat[i] * quant;
 406     }
 407
 408     for (i = 0; i < ctx->num_planes; i++) {
 409         is_chroma    = (i == 1 || i == 2);
 410         plane_factor = slice_width_factor + 2;
 411         if (is_chroma)
 412             plane_factor += ctx->chroma_factor - 3;
 413         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 414             xp          = x << 4;
 415             yp          = y << 4;
 416             num_cblocks = 4;
 417             pwidth      = avctx->width;
 418         } else {
 419             xp          = x << 3;
 420             yp          = y << 4;
 421             num_cblocks = 2;
 422             pwidth      = avctx->width >> 1;
 423         }
 424         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 425
 426         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 427                        pwidth, avctx->height, ctx->blocks[0],
 428                        mbs_per_slice, num_cblocks, is_chroma);
 429         sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
 430                                       mbs_per_slice, ctx->blocks[0],
 431                                       num_cblocks, plane_factor,
 432                                       qmat);
 433         total_size += sizes[i];
 434     }
 435     return total_size;
 436 }
 437
 438 static inline int estimate_vlc(unsigned codebook, int val)
 439 {
 440     unsigned int rice_order, exp_order, switch_bits, switch_val;
 441     int exponent;
 442
 443     /* number of prefix bits to switch between Rice and expGolomb */
 444     switch_bits = (codebook & 3) + 1;
 445     rice_order  =  codebook >> 5;       /* rice code order */
 446     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 447
 448     switch_val  = switch_bits << rice_order;
 449
 450     if (val >= switch_val) {
 451         val -= switch_val - (1 << exp_order);
 452         exponent = av_log2(val);
 453
 454         return exponent * 2 - exp_order + switch_bits + 1;
 455     } else {
 456         return (val >> rice_order) + rice_order + 1;
 457     }
 458 }
 459
 460 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 461                         int scale)
 462 {
 463     int i;
 464     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 465     int bits;
 466
 467     prev_dc  = (blocks[0] - 0x4000) / scale;
 468     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 469     sign     = 0;
 470     codebook = 3;
 471     blocks  += 64;
 472     *error  += FFABS(blocks[0] - 0x4000) % scale;
 473
 474     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 475         dc       = (blocks[0] - 0x4000) / scale;
 476         *error  += FFABS(blocks[0] - 0x4000) % scale;
 477         delta    = dc - prev_dc;
 478         new_sign = GET_SIGN(delta);
 479         delta    = (delta ^ sign) - sign;
 480         code     = MAKE_CODE(delta);
 481         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 482         codebook = (code + (code & 1)) >> 1;
 483         codebook = FFMIN(codebook, 3);
 484         sign     = new_sign;
 485         prev_dc  = dc;
 486     }
 487
 488     return bits;
 489 }
 490
 491 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 492                         int plane_size_factor,
 493                         const uint8_t *scan, const int16_t *qmat)
 494 {
 495     int idx, i;
 496     int run, level, run_cb, lev_cb;
 497     int max_coeffs, abs_level;
 498     int bits = 0;
 499
 500     max_coeffs = blocks_per_slice << 6;
 501     run_cb     = ff_prores_run_to_cb_index[4];
 502     lev_cb     = ff_prores_lev_to_cb_index[2];
 503     run        = 0;
 504
 505     for (i = 1; i < 64; i++) {
 506         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 507             level   = blocks[idx] / qmat[scan[i]];
 508             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 509             if (level) {
 510                 abs_level = FFABS(level);
 511                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 512                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 513                                      abs_level - 1) + 1;
 514
 515                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 516                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 517                 run    = 0;
 518             } else {
 519                 run++;
 520             }
 521         }
 522     }
 523
 524     return bits;
 525 }
 526
 527 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 528                                 const uint16_t *src, int linesize,
 529                                 int mbs_per_slice,
 530                                 int blocks_per_mb, int plane_size_factor,
 531                                 const int16_t *qmat)
 532 {
 533     int blocks_per_slice;
 534     int bits;
 535
 536     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 537
 538     bits  = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
 539     bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
 540                          plane_size_factor, ctx->scantable.permutated, qmat);
 541
 542     return FFALIGN(bits, 8);
 543 }
 544
 545 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 546                             int trellis_node, int x, int y, int mbs_per_slice)
 547 {
 548     ProresContext *ctx = avctx->priv_data;
 549     int i, q, pq, xp, yp;
 550     const uint16_t *src;
 551     int slice_width_factor = av_log2(mbs_per_slice);
 552     int num_cblocks[MAX_PLANES], pwidth;
 553     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 554     const int min_quant = ctx->profile_info->min_quant;
 555     const int max_quant = ctx->profile_info->max_quant;
 556     int error, bits, bits_limit;
 557     int mbs, prev, cur, new_score;
 558     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 559     int overquant;
 560     uint16_t *qmat;
 561
 562     mbs = x + mbs_per_slice;
 563
 564     for (i = 0; i < ctx->num_planes; i++) {
 565         is_chroma[i]    = (i == 1 || i == 2);
 566         plane_factor[i] = slice_width_factor + 2;
 567         if (is_chroma[i])
 568             plane_factor[i] += ctx->chroma_factor - 3;
 569         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 570             xp             = x << 4;
 571             yp             = y << 4;
 572             num_cblocks[i] = 4;
 573             pwidth         = avctx->width;
 574         } else {
 575             xp             = x << 3;
 576             yp             = y << 4;
 577             num_cblocks[i] = 2;
 578             pwidth         = avctx->width >> 1;
 579         }
 580         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 581
 582         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 583                        pwidth, avctx->height, ctx->blocks[i],
 584                        mbs_per_slice, num_cblocks[i], is_chroma[i]);
 585     }
 586
 587     for (q = min_quant; q < max_quant + 2; q++) {
 588         ctx->nodes[trellis_node + q].prev_node = -1;
 589         ctx->nodes[trellis_node + q].quant     = q;
 590     }
 591
 592     // todo: maybe perform coarser quantising to fit into frame size when needed
 593     for (q = min_quant; q <= max_quant; q++) {
 594         bits  = 0;
 595         error = 0;
 596         for (i = 0; i < ctx->num_planes; i++) {
 597             bits += estimate_slice_plane(ctx, &error, i,
 598                                          src, pic->linesize[i],
 599                                          mbs_per_slice,
 600                                          num_cblocks[i], plane_factor[i],
 601                                          ctx->quants[q]);
 602         }
 603         if (bits > 65000 * 8) {
 604             error = SCORE_LIMIT;
 605             break;
 606         }
 607         slice_bits[q]  = bits;
 608         slice_score[q] = error;
 609     }
 610     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 611         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 612         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 613         overquant = max_quant;
 614     } else {
 615         for (q = max_quant + 1; q < 128; q++) {
 616             bits  = 0;
 617             error = 0;
 618             if (q < MAX_STORED_Q) {
 619                 qmat = ctx->quants[q];
 620             } else {
 621                 qmat = ctx->custom_q;
 622                 for (i = 0; i < 64; i++)
 623                     qmat[i] = ctx->quant_mat[i] * q;
 624             }
 625             for (i = 0; i < ctx->num_planes; i++) {
 626                 bits += estimate_slice_plane(ctx, &error, i,
 627                                              src, pic->linesize[i],
 628                                              mbs_per_slice,
 629                                              num_cblocks[i], plane_factor[i],
 630                                              qmat);
 631             }
 632             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 633                 break;
 634         }
 635
 636         slice_bits[max_quant + 1]  = bits;
 637         slice_score[max_quant + 1] = error;
 638         overquant = q;
 639     }
 640     ctx->nodes[trellis_node + max_quant + 1].quant = overquant;
 641
 642     bits_limit = mbs * ctx->bits_per_mb;
 643     for (pq = min_quant; pq < max_quant + 2; pq++) {
 644         prev = trellis_node - TRELLIS_WIDTH + pq;
 645
 646         for (q = min_quant; q < max_quant + 2; q++) {
 647             cur = trellis_node + q;
 648
 649             bits  = ctx->nodes[prev].bits + slice_bits[q];
 650             error = slice_score[q];
 651             if (bits > bits_limit)
 652                 error = SCORE_LIMIT;
 653
 654             if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 655                 new_score = ctx->nodes[prev].score + error;
 656             else
 657                 new_score = SCORE_LIMIT;
 658             if (ctx->nodes[cur].prev_node == -1 ||
 659                 ctx->nodes[cur].score >= new_score) {
 660
 661                 ctx->nodes[cur].bits      = bits;
 662                 ctx->nodes[cur].score     = new_score;
 663                 ctx->nodes[cur].prev_node = prev;
 664             }
 665         }
 666     }
 667
 668     error = ctx->nodes[trellis_node + min_quant].score;
 669     pq    = trellis_node + min_quant;
 670     for (q = min_quant + 1; q < max_quant + 2; q++) {
 671         if (ctx->nodes[trellis_node + q].score <= error) {
 672             error = ctx->nodes[trellis_node + q].score;
 673             pq    = trellis_node + q;
 674         }
 675     }
 676
 677     return pq;
 678 }
 679
 680 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 681                         const AVFrame *pic, int *got_packet)
 682 {
 683     ProresContext *ctx = avctx->priv_data;
 684     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 685     uint8_t *picture_size_pos;
 686     PutBitContext pb;
 687     int x, y, i, mb, q = 0;
 688     int sizes[4] = { 0 };
 689     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 690     int frame_size, picture_size, slice_size;
 691     int mbs_per_slice = ctx->mbs_per_slice;
 692     int pkt_size, ret;
 693
 694     *avctx->coded_frame           = *pic;
 695     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 696     avctx->coded_frame->key_frame = 1;
 697
 698     pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
 699
 700     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 701         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 702         return ret;
 703     }
 704
 705     orig_buf = pkt->data;
 706
 707     // frame atom
 708     orig_buf += 4;                              // frame size
 709     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 710     buf = orig_buf;
 711
 712     // frame header
 713     tmp = buf;
 714     buf += 2;                                   // frame header size will be stored here
 715     bytestream_put_be16  (&buf, 0);             // version 1
 716     bytestream_put_buffer(&buf, ctx->vendor, 4);
 717     bytestream_put_be16  (&buf, avctx->width);
 718     bytestream_put_be16  (&buf, avctx->height);
 719     bytestream_put_byte  (&buf, ctx->chroma_factor << 6); // frame flags
 720     bytestream_put_byte  (&buf, 0);             // reserved
 721     bytestream_put_byte  (&buf, avctx->color_primaries);
 722     bytestream_put_byte  (&buf, avctx->color_trc);
 723     bytestream_put_byte  (&buf, avctx->colorspace);
 724     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 725     bytestream_put_byte  (&buf, 0);             // reserved
 726     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
 727         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
 728         // luma quantisation matrix
 729         for (i = 0; i < 64; i++)
 730             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 731         // chroma quantisation matrix
 732         for (i = 0; i < 64; i++)
 733             bytestream_put_byte(&buf, ctx->quant_mat[i]);
 734     } else {
 735         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
 736     }
 737     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 738
 739     // picture header
 740     picture_size_pos = buf + 1;
 741     bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 742     buf += 4;                                   // picture data size will be stored here
 743     bytestream_put_be16  (&buf, ctx->num_slices); // total number of slices
 744     bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 745
 746     // seek table - will be filled during slice encoding
 747     slice_sizes = buf;
 748     buf += ctx->num_slices * 2;
 749
 750     // slices
 751     for (y = 0; y < ctx->mb_height; y++) {
 752         mbs_per_slice = ctx->mbs_per_slice;
 753         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 754             while (ctx->mb_width - x < mbs_per_slice)
 755                 mbs_per_slice >>= 1;
 756             q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
 757                                  mbs_per_slice);
 758         }
 759
 760         for (x = ctx->slices_width - 1; x >= 0; x--) {
 761             ctx->slice_q[x] = ctx->nodes[q].quant;
 762             q = ctx->nodes[q].prev_node;
 763         }
 764
 765         mbs_per_slice = ctx->mbs_per_slice;
 766         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 767             q = ctx->slice_q[mb];
 768
 769             while (ctx->mb_width - x < mbs_per_slice)
 770                 mbs_per_slice >>= 1;
 771
 772             bytestream_put_byte(&buf, slice_hdr_size << 3);
 773             slice_hdr = buf;
 774             buf += slice_hdr_size - 1;
 775             init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 776             encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 777
 778             bytestream_put_byte(&slice_hdr, q);
 779             slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 780             for (i = 0; i < ctx->num_planes - 1; i++) {
 781                 bytestream_put_be16(&slice_hdr, sizes[i]);
 782                 slice_size += sizes[i];
 783             }
 784             bytestream_put_be16(&slice_sizes, slice_size);
 785             buf += slice_size - slice_hdr_size;
 786         }
 787     }
 788
 789     orig_buf -= 8;
 790     frame_size = buf - orig_buf;
 791     picture_size = buf - picture_size_pos - 6;
 792     bytestream_put_be32(&orig_buf, frame_size);
 793     bytestream_put_be32(&picture_size_pos, picture_size);
 794
 795     pkt->size   = frame_size;
 796     pkt->flags |= AV_PKT_FLAG_KEY;
 797     *got_packet = 1;
 798
 799     return 0;
 800 }
 801
 802 static av_cold int encode_close(AVCodecContext *avctx)
 803 {
 804     ProresContext *ctx = avctx->priv_data;
 805
 806     if (avctx->coded_frame->data[0])
 807         avctx->release_buffer(avctx, avctx->coded_frame);
 808
 809     av_freep(&avctx->coded_frame);
 810
 811     av_freep(&ctx->nodes);
 812     av_freep(&ctx->slice_q);
 813
 814     return 0;
 815 }
 816
 817 static av_cold int encode_init(AVCodecContext *avctx)
 818 {
 819     ProresContext *ctx = avctx->priv_data;
 820     int mps;
 821     int i, j;
 822     int min_quant, max_quant;
 823
 824     avctx->bits_per_raw_sample = 10;
 825     avctx->coded_frame = avcodec_alloc_frame();
 826     if (!avctx->coded_frame)
 827         return AVERROR(ENOMEM);
 828
 829     ff_proresdsp_init(&ctx->dsp, avctx);
 830     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 831                       ff_prores_progressive_scan);
 832
 833     mps = ctx->mbs_per_slice;
 834     if (mps & (mps - 1)) {
 835         av_log(avctx, AV_LOG_ERROR,
 836                "there should be an integer power of two MBs per slice\n");
 837         return AVERROR(EINVAL);
 838     }
 839
 840     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 841                          ? CFACTOR_Y422
 842                          : CFACTOR_Y444;
 843     ctx->profile_info  = prores_profile_info + ctx->profile;
 844     ctx->num_planes    = 3;
 845
 846     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 847     ctx->mb_height     = FFALIGN(avctx->height, 16) >> 4;
 848     ctx->slices_width  = ctx->mb_width / mps;
 849     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 850     ctx->num_slices    = ctx->mb_height * ctx->slices_width;
 851
 852     if (ctx->quant_sel == -1)
 853         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
 854     else
 855         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
 856
 857     if (strlen(ctx->vendor) != 4) {
 858         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 859         return AVERROR_INVALIDDATA;
 860     }
 861
 862     if (!ctx->bits_per_mb) {
 863         for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 864             if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
 865                 break;
 866         ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 867     } else if (ctx->bits_per_mb < 128) {
 868         av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
 869         return AVERROR_INVALIDDATA;
 870     }
 871
 872     ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
 873                                          + (2 * mps * ctx->bits_per_mb) / 8)
 874                       + 200;
 875
 876     min_quant = ctx->profile_info->min_quant;
 877     max_quant = ctx->profile_info->max_quant;
 878     for (i = min_quant; i < MAX_STORED_Q; i++) {
 879         for (j = 0; j < 64; j++)
 880             ctx->quants[i][j] = ctx->quant_mat[j] * i;
 881     }
 882
 883     avctx->codec_tag   = ctx->profile_info->tag;
 884
 885     av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
 886            ctx->profile, ctx->num_slices, ctx->bits_per_mb);
 887     av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
 888            ctx->frame_size);
 889
 890     ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
 891                            * sizeof(*ctx->nodes));
 892     if (!ctx->nodes) {
 893         encode_close(avctx);
 894         return AVERROR(ENOMEM);
 895     }
 896     for (i = min_quant; i < max_quant + 2; i++) {
 897         ctx->nodes[i].prev_node = -1;
 898         ctx->nodes[i].bits      = 0;
 899         ctx->nodes[i].score     = 0;
 900     }
 901
 902     ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
 903     if (!ctx->slice_q) {
 904         encode_close(avctx);
 905         return AVERROR(ENOMEM);
 906     }
 907
 908     return 0;
 909 }
 910
 911 #define OFFSET(x) offsetof(ProresContext, x)
 912 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 913
 914 static const AVOption options[] = {
 915     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
 916         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
 917     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
 918         { PRORES_PROFILE_STANDARD },
 919         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
 920     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
 921         0, 0, VE, "profile" },
 922     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
 923         0, 0, VE, "profile" },
 924     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
 925         0, 0, VE, "profile" },
 926     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
 927         0, 0, VE, "profile" },
 928     { "vendor", "vendor ID", OFFSET(vendor),
 929         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
 930     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
 931         AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE },
 932     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
 933         { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
 934     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { -1 },
 935         0, 0, VE, "quant_mat" },
 936     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY },
 937         0, 0, VE, "quant_mat" },
 938     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT },
 939         0, 0, VE, "quant_mat" },
 940     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD },
 941         0, 0, VE, "quant_mat" },
 942     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ },
 943         0, 0, VE, "quant_mat" },
 944     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT },
 945         0, 0, VE, "quant_mat" },
 946     { NULL }
 947 };
 948
 949 static const AVClass proresenc_class = {
 950     .class_name = "ProRes encoder",
 951     .item_name  = av_default_item_name,
 952     .option     = options,
 953     .version    = LIBAVUTIL_VERSION_INT,
 954 };
 955
 956 AVCodec ff_prores_kostya_encoder = {
 957     .name           = "prores_kostya",
 958     .type           = AVMEDIA_TYPE_VIDEO,
 959     .id             = CODEC_ID_PRORES,
 960     .priv_data_size = sizeof(ProresContext),
 961     .init           = encode_init,
 962     .close          = encode_close,
 963     .encode2        = encode_frame,
 964     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
 965     .pix_fmts       = (const enum PixelFormat[]) {
 966                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
 967                       },
 968     .priv_class     = &proresenc_class,
 969 };