git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 #define NUM_MB_LIMITS 4
  46 static const int prores_mb_limits[NUM_MB_LIMITS] = {
  47     1620, // up to 720x576
  48     2700, // up to 960x720
  49     6075, // up to 1440x1080
  50     9216, // up to 2048x1152
  51 };
  52
  53 static const struct prores_profile {
  54     const char *full_name;
  55     uint32_t    tag;
  56     int         min_quant;
  57     int         max_quant;
  58     int         br_tab[NUM_MB_LIMITS];
  59     uint8_t     quant[64];
  60 } prores_profile_info[4] = {
  61     {
  62         .full_name = "proxy",
  63         .tag       = MKTAG('a', 'p', 'c', 'o'),
  64         .min_quant = 4,
  65         .max_quant = 8,
  66         .br_tab    = { 300, 242, 220, 194 },
  67         .quant     = {
  68              4,  7,  9, 11, 13, 14, 15, 63,
  69              7,  7, 11, 12, 14, 15, 63, 63,
  70              9, 11, 13, 14, 15, 63, 63, 63,
  71             11, 11, 13, 14, 63, 63, 63, 63,
  72             11, 13, 14, 63, 63, 63, 63, 63,
  73             13, 14, 63, 63, 63, 63, 63, 63,
  74             13, 63, 63, 63, 63, 63, 63, 63,
  75             63, 63, 63, 63, 63, 63, 63, 63,
  76         },
  77     },
  78     {
  79         .full_name = "LT",
  80         .tag       = MKTAG('a', 'p', 'c', 's'),
  81         .min_quant = 1,
  82         .max_quant = 9,
  83         .br_tab    = { 720, 560, 490, 440 },
  84         .quant     = {
  85              4,  5,  6,  7,  9, 11, 13, 15,
  86              5,  5,  7,  8, 11, 13, 15, 17,
  87              6,  7,  9, 11, 13, 15, 15, 17,
  88              7,  7,  9, 11, 13, 15, 17, 19,
  89              7,  9, 11, 13, 14, 16, 19, 23,
  90              9, 11, 13, 14, 16, 19, 23, 29,
  91              9, 11, 13, 15, 17, 21, 28, 35,
  92             11, 13, 16, 17, 21, 28, 35, 41,
  93         },
  94     },
  95     {
  96         .full_name = "standard",
  97         .tag       = MKTAG('a', 'p', 'c', 'n'),
  98         .min_quant = 1,
  99         .max_quant = 6,
 100         .br_tab    = { 1050, 808, 710, 632 },
 101         .quant     = {
 102              4,  4,  5,  5,  6,  7,  7,  9,
 103              4,  4,  5,  6,  7,  7,  9,  9,
 104              5,  5,  6,  7,  7,  9,  9, 10,
 105              5,  5,  6,  7,  7,  9,  9, 10,
 106              5,  6,  7,  7,  8,  9, 10, 12,
 107              6,  7,  7,  8,  9, 10, 12, 15,
 108              6,  7,  7,  9, 10, 11, 14, 17,
 109              7,  7,  9, 10, 11, 14, 17, 21,
 110         },
 111     },
 112     {
 113         .full_name = "high quality",
 114         .tag       = MKTAG('a', 'p', 'c', 'h'),
 115         .min_quant = 1,
 116         .max_quant = 6,
 117         .br_tab    = { 1566, 1216, 1070, 950 },
 118         .quant     = {
 119              4,  4,  4,  4,  4,  4,  4,  4,
 120              4,  4,  4,  4,  4,  4,  4,  4,
 121              4,  4,  4,  4,  4,  4,  4,  4,
 122              4,  4,  4,  4,  4,  4,  4,  5,
 123              4,  4,  4,  4,  4,  4,  5,  5,
 124              4,  4,  4,  4,  4,  5,  5,  6,
 125              4,  4,  4,  4,  5,  5,  6,  7,
 126              4,  4,  4,  4,  5,  6,  7,  7,
 127         },
 128     }
 129 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 130 };
 131
 132 #define TRELLIS_WIDTH 16
 133 #define SCORE_LIMIT   INT_MAX / 2
 134
 135 struct TrellisNode {
 136     int prev_node;
 137     int quant;
 138     int bits;
 139     int score;
 140 };
 141
 142 #define MAX_STORED_Q 16
 143
 144 typedef struct ProresContext {
 145     AVClass *class;
 146     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 147     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 148     int16_t quants[MAX_STORED_Q][64];
 149     int16_t custom_q[64];
 150
 151     ProresDSPContext dsp;
 152     ScanTable  scantable;
 153
 154     int mb_width, mb_height;
 155     int mbs_per_slice;
 156     int num_chroma_blocks, chroma_factor;
 157     int slices_width;
 158     int num_slices;
 159     int num_planes;
 160     int bits_per_mb;
 161
 162     int frame_size;
 163
 164     int profile;
 165     const struct prores_profile *profile_info;
 166
 167     struct TrellisNode *nodes;
 168     int *slice_q;
 169 } ProresContext;
 170
 171 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 172                            int linesize, int x, int y, int w, int h,
 173                            DCTELEM *blocks,
 174                            int mbs_per_slice, int blocks_per_mb)
 175 {
 176     const uint16_t *esrc;
 177     const int mb_width = 4 * blocks_per_mb;
 178     int elinesize;
 179     int i, j, k;
 180
 181     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 182         if (x >= w) {
 183             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 184                               * sizeof(*blocks));
 185             return;
 186         }
 187         if (x + mb_width <= w && y + 16 <= h) {
 188             esrc      = src;
 189             elinesize = linesize;
 190         } else {
 191             int bw, bh, pix;
 192             const int estride = 16 / sizeof(*ctx->emu_buf);
 193
 194             esrc      = ctx->emu_buf;
 195             elinesize = 16;
 196
 197             bw = FFMIN(w - x, mb_width);
 198             bh = FFMIN(h - y, 16);
 199
 200             for (j = 0; j < bh; j++) {
 201                 memcpy(ctx->emu_buf + j * estride, src + j * linesize,
 202                        bw * sizeof(*src));
 203                 pix = ctx->emu_buf[j * estride + bw - 1];
 204                 for (k = bw; k < mb_width; k++)
 205                     ctx->emu_buf[j * estride + k] = pix;
 206             }
 207             for (; j < 16; j++)
 208                 memcpy(ctx->emu_buf + j * estride,
 209                        ctx->emu_buf + (bh - 1) * estride,
 210                        mb_width * sizeof(*ctx->emu_buf));
 211         }
 212         ctx->dsp.fdct(esrc, elinesize, blocks);
 213         blocks += 64;
 214         if (blocks_per_mb > 2) {
 215             ctx->dsp.fdct(src + 8, linesize, blocks);
 216             blocks += 64;
 217         }
 218         ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 219         blocks += 64;
 220         if (blocks_per_mb > 2) {
 221             ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 222             blocks += 64;
 223         }
 224
 225         x += mb_width;
 226     }
 227 }
 228
 229 /**
 230  * Write an unsigned rice/exp golomb codeword.
 231  */
 232 static inline void encode_vlc_codeword(PutBitContext *pb, uint8_t codebook, int val)
 233 {
 234     unsigned int rice_order, exp_order, switch_bits, switch_val;
 235     int exponent;
 236
 237     /* number of prefix bits to switch between Rice and expGolomb */
 238     switch_bits = (codebook & 3) + 1;
 239     rice_order  =  codebook >> 5;       /* rice code order */
 240     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 241
 242     switch_val  = switch_bits << rice_order;
 243
 244     if (val >= switch_val) {
 245         val -= switch_val - (1 << exp_order);
 246         exponent = av_log2(val);
 247
 248         put_bits(pb, exponent - exp_order + switch_bits, 0);
 249         put_bits(pb, 1, 1);
 250         put_bits(pb, exponent, val);
 251     } else {
 252         exponent = val >> rice_order;
 253
 254         if (exponent)
 255             put_bits(pb, exponent, 0);
 256         put_bits(pb, 1, 1);
 257         if (rice_order)
 258             put_sbits(pb, rice_order, val);
 259     }
 260 }
 261
 262 #define GET_SIGN(x)  ((x) >> 31)
 263 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 264
 265 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 266                        int blocks_per_slice, int scale)
 267 {
 268     int i;
 269     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 270
 271     prev_dc = (blocks[0] - 0x4000) / scale;
 272     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 273     sign     = 0;
 274     codebook = 3;
 275     blocks  += 64;
 276
 277     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 278         dc       = (blocks[0] - 0x4000) / scale;
 279         delta    = dc - prev_dc;
 280         new_sign = GET_SIGN(delta);
 281         delta    = (delta ^ sign) - sign;
 282         code     = MAKE_CODE(delta);
 283         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 284         codebook = (code + (code & 1)) >> 1;
 285         codebook = FFMIN(codebook, 3);
 286         sign     = new_sign;
 287         prev_dc  = dc;
 288     }
 289 }
 290
 291 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 292                        int blocks_per_slice,
 293                        int plane_size_factor,
 294                        const uint8_t *scan, const int16_t *qmat)
 295 {
 296     int idx, i;
 297     int run, level, run_cb, lev_cb;
 298     int max_coeffs, abs_level;
 299
 300     max_coeffs = blocks_per_slice << 6;
 301     run_cb     = ff_prores_run_to_cb_index[4];
 302     lev_cb     = ff_prores_lev_to_cb_index[2];
 303     run        = 0;
 304
 305     for (i = 1; i < 64; i++) {
 306         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 307             level = blocks[idx] / qmat[scan[i]];
 308             if (level) {
 309                 abs_level = FFABS(level);
 310                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 311                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 312                                     abs_level - 1);
 313                 put_sbits(pb, 1, GET_SIGN(level));
 314
 315                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 316                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 317                 run    = 0;
 318             } else {
 319                 run++;
 320             }
 321         }
 322     }
 323 }
 324
 325 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 326                               const uint16_t *src, int linesize,
 327                               int mbs_per_slice, DCTELEM *blocks,
 328                               int blocks_per_mb, int plane_size_factor,
 329                               const int16_t *qmat)
 330 {
 331     int blocks_per_slice, saved_pos;
 332
 333     saved_pos = put_bits_count(pb);
 334     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 335
 336     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 337     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 338                ctx->scantable.permutated, qmat);
 339     flush_put_bits(pb);
 340
 341     return (put_bits_count(pb) - saved_pos) >> 3;
 342 }
 343
 344 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 345                         PutBitContext *pb,
 346                         int sizes[4], int x, int y, int quant,
 347                         int mbs_per_slice)
 348 {
 349     ProresContext *ctx = avctx->priv_data;
 350     int i, xp, yp;
 351     int total_size = 0;
 352     const uint16_t *src;
 353     int slice_width_factor = av_log2(mbs_per_slice);
 354     int num_cblocks, pwidth;
 355     int plane_factor, is_chroma;
 356     uint16_t *qmat;
 357
 358     if (quant < MAX_STORED_Q) {
 359         qmat = ctx->quants[quant];
 360     } else {
 361         qmat = ctx->custom_q;
 362         for (i = 0; i < 64; i++)
 363             qmat[i] = ctx->profile_info->quant[i] * quant;
 364     }
 365
 366     for (i = 0; i < ctx->num_planes; i++) {
 367         is_chroma    = (i == 1 || i == 2);
 368         plane_factor = slice_width_factor + 2;
 369         if (is_chroma)
 370             plane_factor += ctx->chroma_factor - 3;
 371         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 372             xp          = x << 4;
 373             yp          = y << 4;
 374             num_cblocks = 4;
 375             pwidth      = avctx->width;
 376         } else {
 377             xp          = x << 3;
 378             yp          = y << 4;
 379             num_cblocks = 2;
 380             pwidth      = avctx->width >> 1;
 381         }
 382         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 383
 384         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 385                        pwidth, avctx->height, ctx->blocks[0],
 386                        mbs_per_slice, num_cblocks);
 387         sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
 388                                       mbs_per_slice, ctx->blocks[0],
 389                                       num_cblocks, plane_factor,
 390                                       qmat);
 391         total_size += sizes[i];
 392     }
 393     return total_size;
 394 }
 395
 396 static inline int estimate_vlc(uint8_t codebook, int val)
 397 {
 398     unsigned int rice_order, exp_order, switch_bits, switch_val;
 399     int exponent;
 400
 401     /* number of prefix bits to switch between Rice and expGolomb */
 402     switch_bits = (codebook & 3) + 1;
 403     rice_order  =  codebook >> 5;       /* rice code order */
 404     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 405
 406     switch_val  = switch_bits << rice_order;
 407
 408     if (val >= switch_val) {
 409         val -= switch_val - (1 << exp_order);
 410         exponent = av_log2(val);
 411
 412         return exponent * 2 - exp_order + switch_bits + 1;
 413     } else {
 414         return (val >> rice_order) + rice_order + 1;
 415     }
 416 }
 417
 418 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 419                         int scale)
 420 {
 421     int i;
 422     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 423     int bits;
 424
 425     prev_dc  = (blocks[0] - 0x4000) / scale;
 426     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 427     sign     = 0;
 428     codebook = 3;
 429     blocks  += 64;
 430     *error  += FFABS(blocks[0] - 0x4000) % scale;
 431
 432     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 433         dc       = (blocks[0] - 0x4000) / scale;
 434         *error  += FFABS(blocks[0] - 0x4000) % scale;
 435         delta    = dc - prev_dc;
 436         new_sign = GET_SIGN(delta);
 437         delta    = (delta ^ sign) - sign;
 438         code     = MAKE_CODE(delta);
 439         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 440         codebook = (code + (code & 1)) >> 1;
 441         codebook = FFMIN(codebook, 3);
 442         sign     = new_sign;
 443         prev_dc  = dc;
 444     }
 445
 446     return bits;
 447 }
 448
 449 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 450                         int plane_size_factor,
 451                         const uint8_t *scan, const int16_t *qmat)
 452 {
 453     int idx, i;
 454     int run, level, run_cb, lev_cb;
 455     int max_coeffs, abs_level;
 456     int bits = 0;
 457
 458     max_coeffs = blocks_per_slice << 6;
 459     run_cb     = ff_prores_run_to_cb_index[4];
 460     lev_cb     = ff_prores_lev_to_cb_index[2];
 461     run        = 0;
 462
 463     for (i = 1; i < 64; i++) {
 464         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 465             level   = blocks[idx] / qmat[scan[i]];
 466             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 467             if (level) {
 468                 abs_level = FFABS(level);
 469                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 470                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 471                                      abs_level - 1) + 1;
 472
 473                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 474                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 475                 run    = 0;
 476             } else {
 477                 run++;
 478             }
 479         }
 480     }
 481
 482     return bits;
 483 }
 484
 485 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 486                                 const uint16_t *src, int linesize,
 487                                 int mbs_per_slice,
 488                                 int blocks_per_mb, int plane_size_factor,
 489                                 const int16_t *qmat)
 490 {
 491     int blocks_per_slice;
 492     int bits;
 493
 494     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 495
 496     bits  = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
 497     bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
 498                          plane_size_factor, ctx->scantable.permutated, qmat);
 499
 500     return FFALIGN(bits, 8);
 501 }
 502
 503 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 504                             int trellis_node, int x, int y, int mbs_per_slice)
 505 {
 506     ProresContext *ctx = avctx->priv_data;
 507     int i, q, pq, xp, yp;
 508     const uint16_t *src;
 509     int slice_width_factor = av_log2(mbs_per_slice);
 510     int num_cblocks[MAX_PLANES], pwidth;
 511     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 512     const int min_quant = ctx->profile_info->min_quant;
 513     const int max_quant = ctx->profile_info->max_quant;
 514     int error, bits, bits_limit;
 515     int mbs, prev, cur, new_score;
 516     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 517     int overquant;
 518     uint16_t *qmat;
 519
 520     mbs = x + mbs_per_slice;
 521
 522     for (i = 0; i < ctx->num_planes; i++) {
 523         is_chroma[i]    = (i == 1 || i == 2);
 524         plane_factor[i] = slice_width_factor + 2;
 525         if (is_chroma[i])
 526             plane_factor[i] += ctx->chroma_factor - 3;
 527         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 528             xp             = x << 4;
 529             yp             = y << 4;
 530             num_cblocks[i] = 4;
 531             pwidth         = avctx->width;
 532         } else {
 533             xp             = x << 3;
 534             yp             = y << 4;
 535             num_cblocks[i] = 2;
 536             pwidth         = avctx->width >> 1;
 537         }
 538         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 539
 540         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 541                        pwidth, avctx->height, ctx->blocks[i],
 542                        mbs_per_slice, num_cblocks[i]);
 543     }
 544
 545     for (q = min_quant; q < max_quant + 2; q++) {
 546         ctx->nodes[trellis_node + q].prev_node = -1;
 547         ctx->nodes[trellis_node + q].quant     = q;
 548     }
 549
 550     // todo: maybe perform coarser quantising to fit into frame size when needed
 551     for (q = min_quant; q <= max_quant; q++) {
 552         bits  = 0;
 553         error = 0;
 554         for (i = 0; i < ctx->num_planes; i++) {
 555             bits += estimate_slice_plane(ctx, &error, i,
 556                                          src, pic->linesize[i],
 557                                          mbs_per_slice,
 558                                          num_cblocks[i], plane_factor[i],
 559                                          ctx->quants[q]);
 560         }
 561         if (bits > 65000 * 8) {
 562             error = SCORE_LIMIT;
 563             break;
 564         }
 565         slice_bits[q]  = bits;
 566         slice_score[q] = error;
 567     }
 568     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
 569         slice_bits[max_quant + 1]  = slice_bits[max_quant];
 570         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
 571         overquant = max_quant;
 572     } else {
 573         for (q = max_quant + 1; q < 128; q++) {
 574             bits  = 0;
 575             error = 0;
 576             if (q < MAX_STORED_Q) {
 577                 qmat = ctx->quants[q];
 578             } else {
 579                 qmat = ctx->custom_q;
 580                 for (i = 0; i < 64; i++)
 581                     qmat[i] = ctx->profile_info->quant[i] * q;
 582             }
 583             for (i = 0; i < ctx->num_planes; i++) {
 584                 bits += estimate_slice_plane(ctx, &error, i,
 585                                              src, pic->linesize[i],
 586                                              mbs_per_slice,
 587                                              num_cblocks[i], plane_factor[i],
 588                                              qmat);
 589             }
 590             if (bits <= ctx->bits_per_mb * mbs_per_slice)
 591                 break;
 592         }
 593
 594         slice_bits[max_quant + 1]  = bits;
 595         slice_score[max_quant + 1] = error;
 596         overquant = q;
 597     }
 598     ctx->nodes[trellis_node + max_quant + 1].quant = overquant;
 599
 600     bits_limit = mbs * ctx->bits_per_mb;
 601     for (pq = min_quant; pq < max_quant + 2; pq++) {
 602         prev = trellis_node - TRELLIS_WIDTH + pq;
 603
 604         for (q = min_quant; q < max_quant + 2; q++) {
 605             cur = trellis_node + q;
 606
 607             bits  = ctx->nodes[prev].bits + slice_bits[q];
 608             error = slice_score[q];
 609             if (bits > bits_limit)
 610                 error = SCORE_LIMIT;
 611
 612             if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 613                 new_score = ctx->nodes[prev].score + error;
 614             else
 615                 new_score = SCORE_LIMIT;
 616             if (ctx->nodes[cur].prev_node == -1 ||
 617                 ctx->nodes[cur].score >= new_score) {
 618
 619                 ctx->nodes[cur].bits      = bits;
 620                 ctx->nodes[cur].score     = new_score;
 621                 ctx->nodes[cur].prev_node = prev;
 622             }
 623         }
 624     }
 625
 626     error = ctx->nodes[trellis_node + min_quant].score;
 627     pq    = trellis_node + min_quant;
 628     for (q = min_quant + 1; q < max_quant + 2; q++) {
 629         if (ctx->nodes[trellis_node + q].score <= error) {
 630             error = ctx->nodes[trellis_node + q].score;
 631             pq    = trellis_node + q;
 632         }
 633     }
 634
 635     return pq;
 636 }
 637
 638 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 639                         const AVFrame *pic, int *got_packet)
 640 {
 641     ProresContext *ctx = avctx->priv_data;
 642     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 643     uint8_t *picture_size_pos;
 644     PutBitContext pb;
 645     int x, y, i, mb, q = 0;
 646     int sizes[4] = { 0 };
 647     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 648     int frame_size, picture_size, slice_size;
 649     int mbs_per_slice = ctx->mbs_per_slice;
 650     int pkt_size, ret;
 651
 652     *avctx->coded_frame           = *pic;
 653     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 654     avctx->coded_frame->key_frame = 1;
 655
 656     pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
 657
 658     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 659         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 660         return ret;
 661     }
 662
 663     orig_buf = pkt->data;
 664
 665     // frame atom
 666     orig_buf += 4;                              // frame size
 667     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 668     buf = orig_buf;
 669
 670     // frame header
 671     tmp = buf;
 672     buf += 2;                                   // frame header size will be stored here
 673     bytestream_put_be16  (&buf, 0);             // version 1
 674     bytestream_put_buffer(&buf, "Lavc", 4);     // creator
 675     bytestream_put_be16  (&buf, avctx->width);
 676     bytestream_put_be16  (&buf, avctx->height);
 677     bytestream_put_byte  (&buf, ctx->chroma_factor << 6); // frame flags
 678     bytestream_put_byte  (&buf, 0);             // reserved
 679     bytestream_put_byte  (&buf, 0);             // primaries
 680     bytestream_put_byte  (&buf, 0);             // transfer function
 681     bytestream_put_byte  (&buf, 6);             // colour matrix - ITU-R BT.601-4
 682     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 683     bytestream_put_byte  (&buf, 0);             // reserved
 684     bytestream_put_byte  (&buf, 0x03);          // matrix flags - both matrices are present
 685     // luma quantisation matrix
 686     for (i = 0; i < 64; i++)
 687         bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
 688     // chroma quantisation matrix
 689     for (i = 0; i < 64; i++)
 690         bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
 691     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 692
 693     // picture header
 694     picture_size_pos = buf + 1;
 695     bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 696     buf += 4;                                   // picture data size will be stored here
 697     bytestream_put_be16  (&buf, ctx->num_slices); // total number of slices
 698     bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 699
 700     // seek table - will be filled during slice encoding
 701     slice_sizes = buf;
 702     buf += ctx->num_slices * 2;
 703
 704     // slices
 705     for (y = 0; y < ctx->mb_height; y++) {
 706         mbs_per_slice = ctx->mbs_per_slice;
 707         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 708             while (ctx->mb_width - x < mbs_per_slice)
 709                 mbs_per_slice >>= 1;
 710             q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
 711                                  mbs_per_slice);
 712         }
 713
 714         for (x = ctx->slices_width - 1; x >= 0; x--) {
 715             ctx->slice_q[x] = ctx->nodes[q].quant;
 716             q = ctx->nodes[q].prev_node;
 717         }
 718
 719         mbs_per_slice = ctx->mbs_per_slice;
 720         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 721             q = ctx->slice_q[mb];
 722
 723             while (ctx->mb_width - x < mbs_per_slice)
 724                 mbs_per_slice >>= 1;
 725
 726             bytestream_put_byte(&buf, slice_hdr_size << 3);
 727             slice_hdr = buf;
 728             buf += slice_hdr_size - 1;
 729             init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 730             encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 731
 732             bytestream_put_byte(&slice_hdr, q);
 733             slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 734             for (i = 0; i < ctx->num_planes - 1; i++) {
 735                 bytestream_put_be16(&slice_hdr, sizes[i]);
 736                 slice_size += sizes[i];
 737             }
 738             bytestream_put_be16(&slice_sizes, slice_size);
 739             buf += slice_size - slice_hdr_size;
 740         }
 741     }
 742
 743     orig_buf -= 8;
 744     frame_size = buf - orig_buf;
 745     picture_size = buf - picture_size_pos - 6;
 746     bytestream_put_be32(&orig_buf, frame_size);
 747     bytestream_put_be32(&picture_size_pos, picture_size);
 748
 749     pkt->size   = frame_size;
 750     pkt->flags |= AV_PKT_FLAG_KEY;
 751     *got_packet = 1;
 752
 753     return 0;
 754 }
 755
 756 static av_cold int encode_close(AVCodecContext *avctx)
 757 {
 758     ProresContext *ctx = avctx->priv_data;
 759
 760     if (avctx->coded_frame->data[0])
 761         avctx->release_buffer(avctx, avctx->coded_frame);
 762
 763     av_freep(&avctx->coded_frame);
 764
 765     av_freep(&ctx->nodes);
 766     av_freep(&ctx->slice_q);
 767
 768     return 0;
 769 }
 770
 771 static av_cold int encode_init(AVCodecContext *avctx)
 772 {
 773     ProresContext *ctx = avctx->priv_data;
 774     int mps;
 775     int i, j;
 776     int min_quant, max_quant;
 777
 778     avctx->bits_per_raw_sample = 10;
 779     avctx->coded_frame = avcodec_alloc_frame();
 780     if (!avctx->coded_frame)
 781         return AVERROR(ENOMEM);
 782
 783     ff_proresdsp_init(&ctx->dsp);
 784     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 785                       ff_prores_progressive_scan);
 786
 787     mps = ctx->mbs_per_slice;
 788     if (mps & (mps - 1)) {
 789         av_log(avctx, AV_LOG_ERROR,
 790                "there should be an integer power of two MBs per slice\n");
 791         return AVERROR(EINVAL);
 792     }
 793
 794     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 795                          ? CFACTOR_Y422
 796                          : CFACTOR_Y444;
 797     ctx->profile_info  = prores_profile_info + ctx->profile;
 798     ctx->num_planes    = 3;
 799
 800     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 801     ctx->mb_height     = FFALIGN(avctx->height, 16) >> 4;
 802     ctx->slices_width  = ctx->mb_width / mps;
 803     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 804     ctx->num_slices    = ctx->mb_height * ctx->slices_width;
 805
 806     for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 807         if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
 808             break;
 809     ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 810
 811     ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
 812                                          + (2 * mps * ctx->bits_per_mb) / 8)
 813                       + 200;
 814
 815     min_quant = ctx->profile_info->min_quant;
 816     max_quant = ctx->profile_info->max_quant;
 817     for (i = min_quant; i < MAX_STORED_Q; i++) {
 818         for (j = 0; j < 64; j++)
 819             ctx->quants[i][j] = ctx->profile_info->quant[j] * i;
 820     }
 821
 822     avctx->codec_tag   = ctx->profile_info->tag;
 823
 824     av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
 825            ctx->profile, ctx->num_slices, ctx->bits_per_mb);
 826     av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
 827            ctx->frame_size);
 828
 829     ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
 830                            * sizeof(*ctx->nodes));
 831     if (!ctx->nodes) {
 832         encode_close(avctx);
 833         return AVERROR(ENOMEM);
 834     }
 835     for (i = min_quant; i < max_quant + 2; i++) {
 836         ctx->nodes[i].prev_node = -1;
 837         ctx->nodes[i].bits      = 0;
 838         ctx->nodes[i].score     = 0;
 839     }
 840
 841     ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
 842     if (!ctx->slice_q) {
 843         encode_close(avctx);
 844         return AVERROR(ENOMEM);
 845     }
 846
 847     return 0;
 848 }
 849
 850 #define OFFSET(x) offsetof(ProresContext, x)
 851 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 852
 853 static const AVOption options[] = {
 854     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
 855         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
 856     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
 857         { PRORES_PROFILE_STANDARD },
 858         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
 859     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
 860         0, 0, VE, "profile" },
 861     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
 862         0, 0, VE, "profile" },
 863     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
 864         0, 0, VE, "profile" },
 865     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
 866         0, 0, VE, "profile" },
 867     { NULL }
 868 };
 869
 870 static const AVClass proresenc_class = {
 871     .class_name = "ProRes encoder",
 872     .item_name  = av_default_item_name,
 873     .option     = options,
 874     .version    = LIBAVUTIL_VERSION_INT,
 875 };
 876
 877 AVCodec ff_prores_encoder = {
 878     .name           = "prores",
 879     .type           = AVMEDIA_TYPE_VIDEO,
 880     .id             = CODEC_ID_PRORES,
 881     .priv_data_size = sizeof(ProresContext),
 882     .init           = encode_init,
 883     .close          = encode_close,
 884     .encode2        = encode_frame,
 885     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
 886     .pix_fmts       = (const enum PixelFormat[]) {
 887                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
 888                       },
 889     .priv_class     = &proresenc_class,
 890 };