git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_kostya.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 #define NUM_MB_LIMITS 4
  46 static const int prores_mb_limits[NUM_MB_LIMITS] = {
  47     1620, // up to 720x576
  48     2700, // up to 960x720
  49     6075, // up to 1440x1080
  50     9216, // up to 2048x1152
  51 };
  52
  53 static const struct prores_profile {
  54     const char *full_name;
  55     uint32_t    tag;
  56     int         min_quant;
  57     int         max_quant;
  58     int         br_tab[NUM_MB_LIMITS];
  59     uint8_t     quant[64];
  60 } prores_profile_info[4] = {
  61     {
  62         .full_name = "proxy",
  63         .tag       = MKTAG('a', 'p', 'c', 'o'),
  64         .min_quant = 4,
  65         .max_quant = 8,
  66         .br_tab    = { 300, 242, 220, 194 },
  67         .quant     = {
  68              4,  7,  9, 11, 13, 14, 15, 63,
  69              7,  7, 11, 12, 14, 15, 63, 63,
  70              9, 11, 13, 14, 15, 63, 63, 63,
  71             11, 11, 13, 14, 63, 63, 63, 63,
  72             11, 13, 14, 63, 63, 63, 63, 63,
  73             13, 14, 63, 63, 63, 63, 63, 63,
  74             13, 63, 63, 63, 63, 63, 63, 63,
  75             63, 63, 63, 63, 63, 63, 63, 63,
  76         },
  77     },
  78     {
  79         .full_name = "LT",
  80         .tag       = MKTAG('a', 'p', 'c', 's'),
  81         .min_quant = 1,
  82         .max_quant = 9,
  83         .br_tab    = { 720, 560, 490, 440 },
  84         .quant     = {
  85              4,  5,  6,  7,  9, 11, 13, 15,
  86              5,  5,  7,  8, 11, 13, 15, 17,
  87              6,  7,  9, 11, 13, 15, 15, 17,
  88              7,  7,  9, 11, 13, 15, 17, 19,
  89              7,  9, 11, 13, 14, 16, 19, 23,
  90              9, 11, 13, 14, 16, 19, 23, 29,
  91              9, 11, 13, 15, 17, 21, 28, 35,
  92             11, 13, 16, 17, 21, 28, 35, 41,
  93         },
  94     },
  95     {
  96         .full_name = "standard",
  97         .tag       = MKTAG('a', 'p', 'c', 'n'),
  98         .min_quant = 1,
  99         .max_quant = 6,
 100         .br_tab    = { 1050, 808, 710, 632 },
 101         .quant     = {
 102              4,  4,  5,  5,  6,  7,  7,  9,
 103              4,  4,  5,  6,  7,  7,  9,  9,
 104              5,  5,  6,  7,  7,  9,  9, 10,
 105              5,  5,  6,  7,  7,  9,  9, 10,
 106              5,  6,  7,  7,  8,  9, 10, 12,
 107              6,  7,  7,  8,  9, 10, 12, 15,
 108              6,  7,  7,  9, 10, 11, 14, 17,
 109              7,  7,  9, 10, 11, 14, 17, 21,
 110         },
 111     },
 112     {
 113         .full_name = "high quality",
 114         .tag       = MKTAG('a', 'p', 'c', 'h'),
 115         .min_quant = 1,
 116         .max_quant = 6,
 117         .br_tab    = { 1566, 1216, 1070, 950 },
 118         .quant     = {
 119              4,  4,  4,  4,  4,  4,  4,  4,
 120              4,  4,  4,  4,  4,  4,  4,  4,
 121              4,  4,  4,  4,  4,  4,  4,  4,
 122              4,  4,  4,  4,  4,  4,  4,  5,
 123              4,  4,  4,  4,  4,  4,  5,  5,
 124              4,  4,  4,  4,  4,  5,  5,  6,
 125              4,  4,  4,  4,  5,  5,  6,  7,
 126              4,  4,  4,  4,  5,  6,  7,  7,
 127         },
 128     }
 129 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 130 };
 131
 132 #define TRELLIS_WIDTH 16
 133 #define SCORE_LIMIT   INT_MAX / 2
 134
 135 struct TrellisNode {
 136     int prev_node;
 137     int quant;
 138     int bits;
 139     int score;
 140 };
 141
 142 #define MAX_STORED_Q 16
 143
 144 typedef struct ProresContext {
 145     AVClass *class;
 146     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 147     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 148     int16_t quants[MAX_STORED_Q][64];
 149     int16_t custom_q[64];
 150
 151     ProresDSPContext dsp;
 152     ScanTable  scantable;
 153
 154     int mb_width, mb_height;
 155     int mbs_per_slice;
 156     int num_chroma_blocks, chroma_factor;
 157     int slices_width;
 158     int num_slices;
 159     int num_planes;
 160     int bits_per_mb;
 161
 162     int frame_size;
 163
 164     int profile;
 165     const struct prores_profile *profile_info;
 166
 167     struct TrellisNode *nodes;
 168     int *slice_q;
 169 } ProresContext;
 170
 171 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 172                            int linesize, int x, int y, int w, int h,
 173                            DCTELEM *blocks,
 174                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
 175 {
 176     const uint16_t *esrc;
 177     const int mb_width = 4 * blocks_per_mb;
 178     int elinesize;
 179     int i, j, k;
 180
 181     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 182         if (x >= w) {
 183             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 184                               * sizeof(*blocks));
 185             return;
 186         }
 187         if (x + mb_width <= w && y + 16 <= h) {
 188             esrc      = src;
 189             elinesize = linesize;
 190         } else {
 191             int bw, bh, pix;
 192
 193             esrc      = ctx->emu_buf;
 194             elinesize = 16 * sizeof(*ctx->emu_buf);
 195
 196             bw = FFMIN(w - x, mb_width);
 197             bh = FFMIN(h - y, 16);
 198
 199             for (j = 0; j < bh; j++) {
 200                 memcpy(ctx->emu_buf + j * 16,
 201                        (const uint8_t*)src + j * linesize,
 202                        bw * sizeof(*src));
 203                 pix = ctx->emu_buf[j * 16 + bw - 1];
 204                 for (k = bw; k < mb_width; k++)
 205                     ctx->emu_buf[j * 16 + k] = pix;
 206             }
 207             for (; j < 16; j++)
 208                 memcpy(ctx->emu_buf + j * 16,
 209                        ctx->emu_buf + (bh - 1) * 16,
 210                        mb_width * sizeof(*ctx->emu_buf));
 211         }
 212         if (!is_chroma) {
 213             ctx->dsp.fdct(esrc, elinesize, blocks);
 214             blocks += 64;
 215             if (blocks_per_mb > 2) {
 216                 ctx->dsp.fdct(src + 8, linesize, blocks);
 217                 blocks += 64;
 218             }
 219             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 220             blocks += 64;
 221             if (blocks_per_mb > 2) {
 222                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 223                 blocks += 64;
 224             }
 225         } else {
 226             ctx->dsp.fdct(esrc, elinesize, blocks);
 227             blocks += 64;
 228             ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 229             blocks += 64;
 230             if (blocks_per_mb > 2) {
 231                 ctx->dsp.fdct(src + 8, linesize, blocks);
 232                 blocks += 64;
 233                 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 234                 blocks += 64;
 235             }
 236         }
 237
 238         x += mb_width;
 239     }
 240 }
 241
 242 /**
 243  * Write an unsigned rice/exp golomb codeword.
 244  */
 245 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
 246 {
 247     unsigned int rice_order, exp_order, switch_bits, switch_val;
 248     int exponent;
 249
 250     /* number of prefix bits to switch between Rice and expGolomb */
 251     switch_bits = (codebook & 3) + 1;
 252     rice_order  =  codebook >> 5;       /* rice code order */
 253     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 254
 255     switch_val  = switch_bits << rice_order;
 256
 257     if (val >= switch_val) {
 258         val -= switch_val - (1 << exp_order);
 259         exponent = av_log2(val);
 260
 261         put_bits(pb, exponent - exp_order + switch_bits, 0);
 262         put_bits(pb, 1, 1);
 263         put_bits(pb, exponent, val);
 264     } else {
 265         exponent = val >> rice_order;
 266
 267         if (exponent)
 268             put_bits(pb, exponent, 0);
 269         put_bits(pb, 1, 1);
 270         if (rice_order)
 271             put_sbits(pb, rice_order, val);
 272     }
 273 }
 274
 275 #define GET_SIGN(x)  ((x) >> 31)
 276 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 277
 278 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 279                        int blocks_per_slice, int scale)
 280 {
 281     int i;
 282     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 283
 284     prev_dc = (blocks[0] - 0x4000) / scale;
 285     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 286     sign     = 0;
 287     codebook = 3;
 288     blocks  += 64;
 289
 290     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 291         dc       = (blocks[0] - 0x4000) / scale;
 292         delta    = dc - prev_dc;
 293         new_sign = GET_SIGN(delta);
 294         delta    = (delta ^ sign) - sign;
 295         code     = MAKE_CODE(delta);
 296         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 297         codebook = (code + (code & 1)) >> 1;
 298         codebook = FFMIN(codebook, 3);
 299         sign     = new_sign;
 300         prev_dc  = dc;
 301     }
 302 }
 303
 304 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 305                        int blocks_per_slice,
 306                        int plane_size_factor,
 307                        const uint8_t *scan, const int16_t *qmat)
 308 {
 309     int idx, i;
 310     int run, level, run_cb, lev_cb;
 311     int max_coeffs, abs_level;
 312
 313     max_coeffs = blocks_per_slice << 6;
 314     run_cb     = ff_prores_run_to_cb_index[4];
 315     lev_cb     = ff_prores_lev_to_cb_index[2];
 316     run        = 0;
 317
 318     for (i = 1; i < 64; i++) {
 319         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 320             level = blocks[idx] / qmat[scan[i]];
 321             if (level) {
 322                 abs_level = FFABS(level);
 323                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 324                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 325                                     abs_level - 1);
 326                 put_sbits(pb, 1, GET_SIGN(level));
 327
 328                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 329                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 330                 run    = 0;
 331             } else {
 332                 run++;
 333             }
 334         }
 335     }
 336 }
 337
 338 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 339                               const uint16_t *src, int linesize,
 340                               int mbs_per_slice, DCTELEM *blocks,
 341                               int blocks_per_mb, int plane_size_factor,
 342                               const int16_t *qmat)
 343 {
 344     int blocks_per_slice, saved_pos;
 345
 346     saved_pos = put_bits_count(pb);
 347     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 348
 349     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 350     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 351                ctx->scantable.permutated, qmat);
 352     flush_put_bits(pb);
 353
 354     return (put_bits_count(pb) - saved_pos) >> 3;
 355 }
 356
 357 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 358                         PutBitContext *pb,
 359                         int sizes[4], int x, int y, int quant,
 360                         int mbs_per_slice)
 361 {
 362     ProresContext *ctx = avctx->priv_data;
 363     int i, xp, yp;
 364     int total_size = 0;
 365     const uint16_t *src;
 366     int slice_width_factor = av_log2(mbs_per_slice);
 367     int num_cblocks, pwidth;
 368     int plane_factor, is_chroma;
 369     uint16_t *qmat;
 370
 371     if (quant < MAX_STORED_Q) {
 372         qmat = ctx->quants[quant];
 373     } else {
 374         qmat = ctx->custom_q;
 375         for (i = 0; i < 64; i++)
 376             qmat[i] = ctx->profile_info->quant[i] * quant;
 377     }
 378
 379     for (i = 0; i < ctx->num_planes; i++) {
 380         is_chroma    = (i == 1 || i == 2);
 381         plane_factor = slice_width_factor + 2;
 382         if (is_chroma)
 383             plane_factor += ctx->chroma_factor - 3;
 384         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 385             xp          = x << 4;
 386             yp          = y << 4;
 387             num_cblocks = 4;
 388             pwidth      = avctx->width;
 389         } else {
 390             xp          = x << 3;
 391             yp          = y << 4;
 392             num_cblocks = 2;
 393             pwidth      = avctx->width >> 1;
 394         }
 395         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 396
 397         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 398                        pwidth, avctx->height, ctx->blocks[0],
 399                        mbs_per_slice, num_cblocks, is_chroma);
 400         sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
 401                                       mbs_per_slice, ctx->blocks[0],
 402                                       num_cblocks, plane_factor,
 403                                       qmat);
 404         total_size += sizes[i];
 405     }
 406     return total_size;
 407 }
 408
 409 static inline int estimate_vlc(unsigned codebook, int val)
 410 {
 411     unsigned int rice_order, exp_order, switch_bits, switch_val;
 412     int exponent;
 413
 414     /* number of prefix bits to switch between Rice and expGolomb */
 415     switch_bits = (codebook & 3) + 1;
 416     rice_order  =  codebook >> 5;       /* rice code order */
 417     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 418
 419     switch_val  = switch_bits << rice_order;
 420
 421     if (val >= switch_val) {
 422         val -= switch_val - (1 << exp_order);
 423         exponent = av_log2(val);
 424
 425         return exponent * 2 - exp_order + switch_bits + 1;
 426     } else {
 427         return (val >> rice_order) + rice_order + 1;
 428     }
 429 }
 430
 431 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 432                         int scale)
 433 {
 434     int i;
 435     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 436     int bits;
 437
 438     prev_dc  = (blocks[0] - 0x4000) / scale;
 439     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 440     sign     = 0;
 441     codebook = 3;
 442     blocks  += 64;
 443     *error  += FFABS(blocks[0] - 0x4000) % scale;
 444
 445     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 446         dc       = (blocks[0] - 0x4000) / scale;
 447         *error  += FFABS(blocks[0] - 0x4000) % scale;
 448         delta    = dc - prev_dc;
 449         new_sign = GET_SIGN(delta);
 450         delta    = (delta ^ sign) - sign;
 451         code     = MAKE_CODE(delta);
 452         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 453         codebook = (code + (code & 1)) >> 1;
 454         codebook = FFMIN(codebook, 3);
 455         sign     = new_sign;
 456         prev_dc  = dc;
 457     }
 458
 459     return bits;
 460 }
 461
 462 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 463                         int plane_size_factor,
 464                         const uint8_t *scan, const int16_t *qmat)
 465 {
 466     int idx, i;
 467     int run, level, run_cb, lev_cb;
 468     int max_coeffs, abs_level;
 469     int bits = 0;
 470
 471     max_coeffs = blocks_per_slice << 6;
 472     run_cb     = ff_prores_run_to_cb_index[4];
 473     lev_cb     = ff_prores_lev_to_cb_index[2];
 474     run        = 0;
 475
 476     for (i = 1; i < 64; i++) {
 477         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 478             level   = blocks[idx] / qmat[scan[i]];
 479             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 480             if (level) {
 481                 abs_level = FFABS(level);
 482                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 483                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 484                                      abs_level - 1) + 1;
 485
 486                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 487                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 488                 run    = 0;
 489             } else {
 490                 run++;
 491             }
 492         }
 493     }
 494
 495     return bits;
 496 }
 497
 498 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 499                                 const uint16_t *src, int linesize,
 500                                 int mbs_per_slice,
 501                                 int blocks_per_mb, int plane_size_factor,
 502                                 const int16_t *qmat)
 503 {
 504     int blocks_per_slice;
 505     int bits;
 506
 507     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 508
 509     bits  = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
 510     bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
 511                          plane_size_factor, ctx->scantable.permutated, qmat);
 512
 513     return FFALIGN(bits, 8);
 514 }
 515
 516 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 517                             int trellis_node, int x, int y, int mbs_per_slice)
 518 {
 519     ProresContext *ctx = avctx->priv_data;
 520     int i, q, pq, xp, yp;
 521     const uint16_t *src;
 522     int slice_width_factor = av_log2(mbs_per_slice);
 523     int num_cblocks[MAX_PLANES], pwidth;
 524     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 525     const int min_quant = ctx->profile_info->min_quant;
 526     const int max_quant = ctx->profile_info->max_quant;
 527     int error, bits, bits_limit;
 528     int mbs, prev, cur, new_score;
 529     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 530     int overquant;
 531     uint16_t *qmat;
 532
 533     mbs = x + mbs_per_slice;
 534
 535     for (i = 0; i < ctx->num_planes; i++) {
 536         is_chroma[i]    = (i == 1 || i == 2);
 537         plane_factor[i] = slice_width_factor + 2;
 538         if (is_chroma[i])
 539             plane_factor[i] += ctx->chroma_factor - 3;
 540         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 541             xp             = x << 4;
 542             yp             = y << 4;
 543             num_cblocks[i] = 4;
 544             pwidth         = avctx->width;
 545         } else {
 546             xp             = x << 3;
 547             yp             = y << 4;
 548             num_cblocks[i] = 2;
 549             pwidth         = avctx->width >> 1;
 550         }
 551         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 552
 553         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 554                        pwidth, avctx->height, ctx->blocks[i],
 555                        mbs_per_slice, num_cblocks[i], is_chroma[i]);
 556     }
 557
 558     for (q = min_quant; q < max_quant + 2; q++) {
 559         ctx->nodes[trellis_node + q].prev_node = -1;
 560         ctx->nodes[trellis_node + q].quant     = q;
 561     }
 562
 563     // todo: maybe perform coarser quantising to fit into frame size when needed
 564     for (q = min_quant; q <= max_quant; q++) {
 565         bits  = 0;
 566         error = 0;
 567         for (i = 0; i < ctx->num_planes; i++) {
 568             bits += estimate_slice_plane(ctx, &error, i,
 569                                          src, pic->linesize[i],
 570                                          mbs_per_slice,
 571                                          num_cblocks[i], plane_factor[i],
 572                                          ctx->quants[q]);
 573         }
 574         if (bits > 65000 * 8) {
 575             error = SCORE_LIMIT;
 576             break;
 577         }
 578         slice_bits[q]  = bits;
 579         slice_score[q] = error;
 580     }
 581     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 582         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 583         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 584         overquant = max_quant;
 585     } else {
 586         for (q = max_quant + 1; q < 128; q++) {
 587             bits  = 0;
 588             error = 0;
 589             if (q < MAX_STORED_Q) {
 590                 qmat = ctx->quants[q];
 591             } else {
 592                 qmat = ctx->custom_q;
 593                 for (i = 0; i < 64; i++)
 594                     qmat[i] = ctx->profile_info->quant[i] * q;
 595             }
 596             for (i = 0; i < ctx->num_planes; i++) {
 597                 bits += estimate_slice_plane(ctx, &error, i,
 598                                              src, pic->linesize[i],
 599                                              mbs_per_slice,
 600                                              num_cblocks[i], plane_factor[i],
 601                                              qmat);
 602             }
 603             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 604                 break;
 605         }
 606
 607         slice_bits[max_quant + 1]  = bits;
 608         slice_score[max_quant + 1] = error;
 609         overquant = q;
 610     }
 611     ctx->nodes[trellis_node + max_quant + 1].quant = overquant;
 612
 613     bits_limit = mbs * ctx->bits_per_mb;
 614     for (pq = min_quant; pq < max_quant + 2; pq++) {
 615         prev = trellis_node - TRELLIS_WIDTH + pq;
 616
 617         for (q = min_quant; q < max_quant + 2; q++) {
 618             cur = trellis_node + q;
 619
 620             bits  = ctx->nodes[prev].bits + slice_bits[q];
 621             error = slice_score[q];
 622             if (bits > bits_limit)
 623                 error = SCORE_LIMIT;
 624
 625             if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 626                 new_score = ctx->nodes[prev].score + error;
 627             else
 628                 new_score = SCORE_LIMIT;
 629             if (ctx->nodes[cur].prev_node == -1 ||
 630                 ctx->nodes[cur].score >= new_score) {
 631
 632                 ctx->nodes[cur].bits      = bits;
 633                 ctx->nodes[cur].score     = new_score;
 634                 ctx->nodes[cur].prev_node = prev;
 635             }
 636         }
 637     }
 638
 639     error = ctx->nodes[trellis_node + min_quant].score;
 640     pq    = trellis_node + min_quant;
 641     for (q = min_quant + 1; q < max_quant + 2; q++) {
 642         if (ctx->nodes[trellis_node + q].score <= error) {
 643             error = ctx->nodes[trellis_node + q].score;
 644             pq    = trellis_node + q;
 645         }
 646     }
 647
 648     return pq;
 649 }
 650
 651 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 652                         const AVFrame *pic, int *got_packet)
 653 {
 654     ProresContext *ctx = avctx->priv_data;
 655     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 656     uint8_t *picture_size_pos;
 657     PutBitContext pb;
 658     int x, y, i, mb, q = 0;
 659     int sizes[4] = { 0 };
 660     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 661     int frame_size, picture_size, slice_size;
 662     int mbs_per_slice = ctx->mbs_per_slice;
 663     int pkt_size, ret;
 664
 665     *avctx->coded_frame           = *pic;
 666     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 667     avctx->coded_frame->key_frame = 1;
 668
 669     pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
 670
 671     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 672         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 673         return ret;
 674     }
 675
 676     orig_buf = pkt->data;
 677
 678     // frame atom
 679     orig_buf += 4;                              // frame size
 680     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 681     buf = orig_buf;
 682
 683     // frame header
 684     tmp = buf;
 685     buf += 2;                                   // frame header size will be stored here
 686     bytestream_put_be16  (&buf, 0);             // version 1
 687     bytestream_put_buffer(&buf, "Lavc", 4);     // creator
 688     bytestream_put_be16  (&buf, avctx->width);
 689     bytestream_put_be16  (&buf, avctx->height);
 690     bytestream_put_byte  (&buf, ctx->chroma_factor << 6); // frame flags
 691     bytestream_put_byte  (&buf, 0);             // reserved
 692     bytestream_put_byte  (&buf, avctx->color_primaries);
 693     bytestream_put_byte  (&buf, avctx->color_trc);
 694     bytestream_put_byte  (&buf, avctx->colorspace);
 695     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 696     bytestream_put_byte  (&buf, 0);             // reserved
 697     bytestream_put_byte  (&buf, 0x03);          // matrix flags - both matrices are present
 698     // luma quantisation matrix
 699     for (i = 0; i < 64; i++)
 700         bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
 701     // chroma quantisation matrix
 702     for (i = 0; i < 64; i++)
 703         bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
 704     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 705
 706     // picture header
 707     picture_size_pos = buf + 1;
 708     bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 709     buf += 4;                                   // picture data size will be stored here
 710     bytestream_put_be16  (&buf, ctx->num_slices); // total number of slices
 711     bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 712
 713     // seek table - will be filled during slice encoding
 714     slice_sizes = buf;
 715     buf += ctx->num_slices * 2;
 716
 717     // slices
 718     for (y = 0; y < ctx->mb_height; y++) {
 719         mbs_per_slice = ctx->mbs_per_slice;
 720         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 721             while (ctx->mb_width - x < mbs_per_slice)
 722                 mbs_per_slice >>= 1;
 723             q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
 724                                  mbs_per_slice);
 725         }
 726
 727         for (x = ctx->slices_width - 1; x >= 0; x--) {
 728             ctx->slice_q[x] = ctx->nodes[q].quant;
 729             q = ctx->nodes[q].prev_node;
 730         }
 731
 732         mbs_per_slice = ctx->mbs_per_slice;
 733         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 734             q = ctx->slice_q[mb];
 735
 736             while (ctx->mb_width - x < mbs_per_slice)
 737                 mbs_per_slice >>= 1;
 738
 739             bytestream_put_byte(&buf, slice_hdr_size << 3);
 740             slice_hdr = buf;
 741             buf += slice_hdr_size - 1;
 742             init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 743             encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 744
 745             bytestream_put_byte(&slice_hdr, q);
 746             slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 747             for (i = 0; i < ctx->num_planes - 1; i++) {
 748                 bytestream_put_be16(&slice_hdr, sizes[i]);
 749                 slice_size += sizes[i];
 750             }
 751             bytestream_put_be16(&slice_sizes, slice_size);
 752             buf += slice_size - slice_hdr_size;
 753         }
 754     }
 755
 756     orig_buf -= 8;
 757     frame_size = buf - orig_buf;
 758     picture_size = buf - picture_size_pos - 6;
 759     bytestream_put_be32(&orig_buf, frame_size);
 760     bytestream_put_be32(&picture_size_pos, picture_size);
 761
 762     pkt->size   = frame_size;
 763     pkt->flags |= AV_PKT_FLAG_KEY;
 764     *got_packet = 1;
 765
 766     return 0;
 767 }
 768
 769 static av_cold int encode_close(AVCodecContext *avctx)
 770 {
 771     ProresContext *ctx = avctx->priv_data;
 772
 773     if (avctx->coded_frame->data[0])
 774         avctx->release_buffer(avctx, avctx->coded_frame);
 775
 776     av_freep(&avctx->coded_frame);
 777
 778     av_freep(&ctx->nodes);
 779     av_freep(&ctx->slice_q);
 780
 781     return 0;
 782 }
 783
 784 static av_cold int encode_init(AVCodecContext *avctx)
 785 {
 786     ProresContext *ctx = avctx->priv_data;
 787     int mps;
 788     int i, j;
 789     int min_quant, max_quant;
 790
 791     avctx->bits_per_raw_sample = 10;
 792     avctx->coded_frame = avcodec_alloc_frame();
 793     if (!avctx->coded_frame)
 794         return AVERROR(ENOMEM);
 795
 796     ff_proresdsp_init(&ctx->dsp, avctx);
 797     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 798                       ff_prores_progressive_scan);
 799
 800     mps = ctx->mbs_per_slice;
 801     if (mps & (mps - 1)) {
 802         av_log(avctx, AV_LOG_ERROR,
 803                "there should be an integer power of two MBs per slice\n");
 804         return AVERROR(EINVAL);
 805     }
 806
 807     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 808                          ? CFACTOR_Y422
 809                          : CFACTOR_Y444;
 810     ctx->profile_info  = prores_profile_info + ctx->profile;
 811     ctx->num_planes    = 3;
 812
 813     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 814     ctx->mb_height     = FFALIGN(avctx->height, 16) >> 4;
 815     ctx->slices_width  = ctx->mb_width / mps;
 816     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 817     ctx->num_slices    = ctx->mb_height * ctx->slices_width;
 818
 819     for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 820         if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
 821             break;
 822     ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 823
 824     ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
 825                                          + (2 * mps * ctx->bits_per_mb) / 8)
 826                       + 200;
 827
 828     min_quant = ctx->profile_info->min_quant;
 829     max_quant = ctx->profile_info->max_quant;
 830     for (i = min_quant; i < MAX_STORED_Q; i++) {
 831         for (j = 0; j < 64; j++)
 832             ctx->quants[i][j] = ctx->profile_info->quant[j] * i;
 833     }
 834
 835     avctx->codec_tag   = ctx->profile_info->tag;
 836
 837     av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
 838            ctx->profile, ctx->num_slices, ctx->bits_per_mb);
 839     av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
 840            ctx->frame_size);
 841
 842     ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
 843                            * sizeof(*ctx->nodes));
 844     if (!ctx->nodes) {
 845         encode_close(avctx);
 846         return AVERROR(ENOMEM);
 847     }
 848     for (i = min_quant; i < max_quant + 2; i++) {
 849         ctx->nodes[i].prev_node = -1;
 850         ctx->nodes[i].bits      = 0;
 851         ctx->nodes[i].score     = 0;
 852     }
 853
 854     ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
 855     if (!ctx->slice_q) {
 856         encode_close(avctx);
 857         return AVERROR(ENOMEM);
 858     }
 859
 860     return 0;
 861 }
 862
 863 #define OFFSET(x) offsetof(ProresContext, x)
 864 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 865
 866 static const AVOption options[] = {
 867     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
 868         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
 869     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
 870         { PRORES_PROFILE_STANDARD },
 871         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
 872     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
 873         0, 0, VE, "profile" },
 874     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
 875         0, 0, VE, "profile" },
 876     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
 877         0, 0, VE, "profile" },
 878     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
 879         0, 0, VE, "profile" },
 880     { NULL }
 881 };
 882
 883 static const AVClass proresenc_class = {
 884     .class_name = "ProRes encoder",
 885     .item_name  = av_default_item_name,
 886     .option     = options,
 887     .version    = LIBAVUTIL_VERSION_INT,
 888 };
 889
 890 AVCodec ff_prores_kostya_encoder = {
 891     .name           = "prores_kostya",
 892     .type           = AVMEDIA_TYPE_VIDEO,
 893     .id             = CODEC_ID_PRORES,
 894     .priv_data_size = sizeof(ProresContext),
 895     .init           = encode_init,
 896     .close          = encode_close,
 897     .encode2        = encode_frame,
 898     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
 899     .pix_fmts       = (const enum PixelFormat[]) {
 900                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
 901                       },
 902     .priv_class     = &proresenc_class,
 903 };