git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2012 Konstantin Shishkov
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "libavutil/opt.h"
  24 #include "avcodec.h"
  25 #include "put_bits.h"
  26 #include "bytestream.h"
  27 #include "internal.h"
  28 #include "proresdsp.h"
  29 #include "proresdata.h"
  30
  31 #define CFACTOR_Y422 2
  32 #define CFACTOR_Y444 3
  33
  34 #define MAX_MBS_PER_SLICE 8
  35
  36 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
  37
  38 enum {
  39     PRORES_PROFILE_PROXY = 0,
  40     PRORES_PROFILE_LT,
  41     PRORES_PROFILE_STANDARD,
  42     PRORES_PROFILE_HQ,
  43 };
  44
  45 #define NUM_MB_LIMITS 4
  46 static const int prores_mb_limits[NUM_MB_LIMITS] = {
  47     1620, // up to 720x576
  48     2700, // up to 960x720
  49     6075, // up to 1440x1080
  50     9216, // up to 2048x1152
  51 };
  52
  53 static const struct prores_profile {
  54     const char *full_name;
  55     uint32_t    tag;
  56     int         min_quant;
  57     int         max_quant;
  58     int         br_tab[NUM_MB_LIMITS];
  59     uint8_t     quant[64];
  60 } prores_profile_info[4] = {
  61     {
  62         .full_name = "proxy",
  63         .tag       = MKTAG('a', 'p', 'c', 'o'),
  64         .min_quant = 4,
  65         .max_quant = 8,
  66         .br_tab    = { 300, 242, 220, 194 },
  67         .quant     = {
  68              4,  7,  9, 11, 13, 14, 15, 63,
  69              7,  7, 11, 12, 14, 15, 63, 63,
  70              9, 11, 13, 14, 15, 63, 63, 63,
  71             11, 11, 13, 14, 63, 63, 63, 63,
  72             11, 13, 14, 63, 63, 63, 63, 63,
  73             13, 14, 63, 63, 63, 63, 63, 63,
  74             13, 63, 63, 63, 63, 63, 63, 63,
  75             63, 63, 63, 63, 63, 63, 63, 63,
  76         },
  77     },
  78     {
  79         .full_name = "LT",
  80         .tag       = MKTAG('a', 'p', 'c', 's'),
  81         .min_quant = 1,
  82         .max_quant = 9,
  83         .br_tab    = { 720, 560, 490, 440 },
  84         .quant     = {
  85              4,  5,  6,  7,  9, 11, 13, 15,
  86              5,  5,  7,  8, 11, 13, 15, 17,
  87              6,  7,  9, 11, 13, 15, 15, 17,
  88              7,  7,  9, 11, 13, 15, 17, 19,
  89              7,  9, 11, 13, 14, 16, 19, 23,
  90              9, 11, 13, 14, 16, 19, 23, 29,
  91              9, 11, 13, 15, 17, 21, 28, 35,
  92             11, 13, 16, 17, 21, 28, 35, 41,
  93         },
  94     },
  95     {
  96         .full_name = "standard",
  97         .tag       = MKTAG('a', 'p', 'c', 'n'),
  98         .min_quant = 1,
  99         .max_quant = 6,
 100         .br_tab    = { 1050, 808, 710, 632 },
 101         .quant     = {
 102              4,  4,  5,  5,  6,  7,  7,  9,
 103              4,  4,  5,  6,  7,  7,  9,  9,
 104              5,  5,  6,  7,  7,  9,  9, 10,
 105              5,  5,  6,  7,  7,  9,  9, 10,
 106              5,  6,  7,  7,  8,  9, 10, 12,
 107              6,  7,  7,  8,  9, 10, 12, 15,
 108              6,  7,  7,  9, 10, 11, 14, 17,
 109              7,  7,  9, 10, 11, 14, 17, 21,
 110         },
 111     },
 112     {
 113         .full_name = "high quality",
 114         .tag       = MKTAG('a', 'p', 'c', 'h'),
 115         .min_quant = 1,
 116         .max_quant = 6,
 117         .br_tab    = { 1566, 1216, 1070, 950 },
 118         .quant     = {
 119              4,  4,  4,  4,  4,  4,  4,  4,
 120              4,  4,  4,  4,  4,  4,  4,  4,
 121              4,  4,  4,  4,  4,  4,  4,  4,
 122              4,  4,  4,  4,  4,  4,  4,  5,
 123              4,  4,  4,  4,  4,  4,  5,  5,
 124              4,  4,  4,  4,  4,  5,  5,  6,
 125              4,  4,  4,  4,  5,  5,  6,  7,
 126              4,  4,  4,  4,  5,  6,  7,  7,
 127         },
 128     }
 129 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
 130 };
 131
 132 #define TRELLIS_WIDTH 16
 133 #define SCORE_LIMIT   INT_MAX / 2
 134
 135 struct TrellisNode {
 136     int prev_node;
 137     int quant;
 138     int bits;
 139     int score;
 140 };
 141
 142 typedef struct ProresContext {
 143     AVClass *class;
 144     DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
 145     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
 146     int16_t quants[16][64];
 147
 148     ProresDSPContext dsp;
 149     ScanTable  scantable;
 150
 151     int mb_width, mb_height;
 152     int mbs_per_slice;
 153     int num_chroma_blocks, chroma_factor;
 154     int slices_width;
 155     int num_slices;
 156     int num_planes;
 157     int bits_per_mb;
 158
 159     int profile;
 160     const struct prores_profile *profile_info;
 161
 162     struct TrellisNode *nodes;
 163     int *slice_q;
 164 } ProresContext;
 165
 166 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
 167                            int linesize, int x, int y, int w, int h,
 168                            DCTELEM *blocks,
 169                            int mbs_per_slice, int blocks_per_mb)
 170 {
 171     const uint16_t *esrc;
 172     const int mb_width = 4 * blocks_per_mb;
 173     int elinesize;
 174     int i, j, k;
 175
 176     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
 177         if (x >= w) {
 178             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
 179                               * sizeof(*blocks));
 180             return;
 181         }
 182         if (x + mb_width <= w && y + 16 <= h) {
 183             esrc      = src;
 184             elinesize = linesize;
 185         } else {
 186             int bw, bh, pix;
 187             const int estride = 16 / sizeof(*ctx->emu_buf);
 188
 189             esrc      = ctx->emu_buf;
 190             elinesize = 16;
 191
 192             bw = FFMIN(w - x, mb_width);
 193             bh = FFMIN(h - y, 16);
 194
 195             for (j = 0; j < bh; j++) {
 196                 memcpy(ctx->emu_buf + j * estride, src + j * linesize,
 197                        bw * sizeof(*src));
 198                 pix = ctx->emu_buf[j * estride + bw - 1];
 199                 for (k = bw; k < mb_width; k++)
 200                     ctx->emu_buf[j * estride + k] = pix;
 201             }
 202             for (; j < 16; j++)
 203                 memcpy(ctx->emu_buf + j * estride,
 204                        ctx->emu_buf + (bh - 1) * estride,
 205                        mb_width * sizeof(*ctx->emu_buf));
 206         }
 207         ctx->dsp.fdct(esrc, elinesize, blocks);
 208         blocks += 64;
 209         if (blocks_per_mb > 2) {
 210             ctx->dsp.fdct(src + 8, linesize, blocks);
 211             blocks += 64;
 212         }
 213         ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
 214         blocks += 64;
 215         if (blocks_per_mb > 2) {
 216             ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
 217             blocks += 64;
 218         }
 219
 220         x += mb_width;
 221     }
 222 }
 223
 224 /**
 225  * Write an unsigned rice/exp golomb codeword.
 226  */
 227 static inline void encode_vlc_codeword(PutBitContext *pb, uint8_t codebook, int val)
 228 {
 229     unsigned int rice_order, exp_order, switch_bits, switch_val;
 230     int exponent;
 231
 232     /* number of prefix bits to switch between Rice and expGolomb */
 233     switch_bits = (codebook & 3) + 1;
 234     rice_order  =  codebook >> 5;       /* rice code order */
 235     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 236
 237     switch_val  = switch_bits << rice_order;
 238
 239     if (val >= switch_val) {
 240         val -= switch_val - (1 << exp_order);
 241         exponent = av_log2(val);
 242
 243         put_bits(pb, exponent - exp_order + switch_bits, 0);
 244         put_bits(pb, 1, 1);
 245         put_bits(pb, exponent, val);
 246     } else {
 247         exponent = val >> rice_order;
 248
 249         if (exponent)
 250             put_bits(pb, exponent, 0);
 251         put_bits(pb, 1, 1);
 252         if (rice_order)
 253             put_sbits(pb, rice_order, val);
 254     }
 255 }
 256
 257 #define GET_SIGN(x)  ((x) >> 31)
 258 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 259
 260 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
 261                        int blocks_per_slice, int scale)
 262 {
 263     int i;
 264     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 265
 266     prev_dc = (blocks[0] - 0x4000) / scale;
 267     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
 268     codebook = 3;
 269     blocks  += 64;
 270
 271     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 272         dc       = (blocks[0] - 0x4000) / scale;
 273         delta    = dc - prev_dc;
 274         new_sign = GET_SIGN(delta);
 275         delta    = (delta ^ sign) - sign;
 276         code     = MAKE_CODE(delta);
 277         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
 278         codebook = (code + (code & 1)) >> 1;
 279         codebook = FFMIN(codebook, 3);
 280         sign     = new_sign;
 281         prev_dc  = dc;
 282     }
 283 }
 284
 285 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
 286                        int blocks_per_slice,
 287                        int plane_size_factor,
 288                        const uint8_t *scan, const int16_t *qmat)
 289 {
 290     int idx, i;
 291     int run, level, run_cb, lev_cb;
 292     int max_coeffs, abs_level;
 293
 294     max_coeffs = blocks_per_slice << 6;
 295     run_cb     = ff_prores_run_to_cb_index[4];
 296     lev_cb     = ff_prores_lev_to_cb_index[2];
 297     run        = 0;
 298
 299     for (i = 1; i < 64; i++) {
 300         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 301             level = blocks[idx] / qmat[scan[i]];
 302             if (level) {
 303                 abs_level = FFABS(level);
 304                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
 305                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
 306                                     abs_level - 1);
 307                 put_sbits(pb, 1, GET_SIGN(level));
 308
 309                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 310                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 311                 run    = 0;
 312             } else {
 313                 run++;
 314             }
 315         }
 316     }
 317 }
 318
 319 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
 320                               const uint16_t *src, int linesize,
 321                               int mbs_per_slice, DCTELEM *blocks,
 322                               int blocks_per_mb, int plane_size_factor,
 323                               const int16_t *qmat)
 324 {
 325     int blocks_per_slice, saved_pos;
 326
 327     saved_pos = put_bits_count(pb);
 328     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 329
 330     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
 331     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
 332                ctx->scantable.permutated, qmat);
 333     flush_put_bits(pb);
 334
 335     return (put_bits_count(pb) - saved_pos) >> 3;
 336 }
 337
 338 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
 339                         PutBitContext *pb,
 340                         int sizes[4], int x, int y, int quant,
 341                         int mbs_per_slice)
 342 {
 343     ProresContext *ctx = avctx->priv_data;
 344     int i, xp, yp;
 345     int total_size = 0;
 346     const uint16_t *src;
 347     int slice_width_factor = av_log2(mbs_per_slice);
 348     int num_cblocks, pwidth;
 349     int plane_factor, is_chroma;
 350
 351     for (i = 0; i < ctx->num_planes; i++) {
 352         is_chroma    = (i == 1 || i == 2);
 353         plane_factor = slice_width_factor + 2;
 354         if (is_chroma)
 355             plane_factor += ctx->chroma_factor - 3;
 356         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
 357             xp          = x << 4;
 358             yp          = y << 4;
 359             num_cblocks = 4;
 360             pwidth      = avctx->width;
 361         } else {
 362             xp          = x << 3;
 363             yp          = y << 4;
 364             num_cblocks = 2;
 365             pwidth      = avctx->width >> 1;
 366         }
 367         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 368
 369         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 370                        pwidth, avctx->height, ctx->blocks[0],
 371                        mbs_per_slice, num_cblocks);
 372         sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
 373                                       mbs_per_slice, ctx->blocks[0],
 374                                       num_cblocks, plane_factor,
 375                                       ctx->quants[quant]);
 376         total_size += sizes[i];
 377     }
 378     return total_size;
 379 }
 380
 381 static inline int estimate_vlc(uint8_t codebook, int val)
 382 {
 383     unsigned int rice_order, exp_order, switch_bits, switch_val;
 384     int exponent;
 385
 386     /* number of prefix bits to switch between Rice and expGolomb */
 387     switch_bits = (codebook & 3) + 1;
 388     rice_order  =  codebook >> 5;       /* rice code order */
 389     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 390
 391     switch_val  = switch_bits << rice_order;
 392
 393     if (val >= switch_val) {
 394         val -= switch_val - (1 << exp_order);
 395         exponent = av_log2(val);
 396
 397         return exponent * 2 - exp_order + switch_bits + 1;
 398     } else {
 399         return (val >> rice_order) + rice_order + 1;
 400     }
 401 }
 402
 403 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
 404                         int scale)
 405 {
 406     int i;
 407     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 408     int bits;
 409
 410     prev_dc  = (blocks[0] - 0x4000) / scale;
 411     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
 412     codebook = 3;
 413     blocks  += 64;
 414     *error  += FFABS(blocks[0] - 0x4000) % scale;
 415
 416     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
 417         dc       = (blocks[0] - 0x4000) / scale;
 418         *error  += FFABS(blocks[0] - 0x4000) % scale;
 419         delta    = dc - prev_dc;
 420         new_sign = GET_SIGN(delta);
 421         delta    = (delta ^ sign) - sign;
 422         code     = MAKE_CODE(delta);
 423         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
 424         codebook = (code + (code & 1)) >> 1;
 425         codebook = FFMIN(codebook, 3);
 426         sign     = new_sign;
 427         prev_dc  = dc;
 428     }
 429
 430     return bits;
 431 }
 432
 433 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
 434                         int plane_size_factor,
 435                         const uint8_t *scan, const int16_t *qmat)
 436 {
 437     int idx, i;
 438     int run, level, run_cb, lev_cb;
 439     int max_coeffs, abs_level;
 440     int bits = 0;
 441
 442     max_coeffs = blocks_per_slice << 6;
 443     run_cb     = ff_prores_run_to_cb_index[4];
 444     lev_cb     = ff_prores_lev_to_cb_index[2];
 445     run        = 0;
 446
 447     for (i = 1; i < 64; i++) {
 448         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
 449             level   = blocks[idx] / qmat[scan[i]];
 450             *error += FFABS(blocks[idx]) % qmat[scan[i]];
 451             if (level) {
 452                 abs_level = FFABS(level);
 453                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
 454                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
 455                                      abs_level - 1) + 1;
 456
 457                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
 458                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
 459                 run    = 0;
 460             } else {
 461                 run++;
 462             }
 463         }
 464     }
 465
 466     return bits;
 467 }
 468
 469 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
 470                                 const uint16_t *src, int linesize,
 471                                 int mbs_per_slice,
 472                                 int blocks_per_mb, int plane_size_factor,
 473                                 const int16_t *qmat)
 474 {
 475     int blocks_per_slice;
 476     int bits;
 477
 478     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 479
 480     bits  = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
 481     bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
 482                          plane_size_factor, ctx->scantable.permutated, qmat);
 483
 484     return FFALIGN(bits, 8);
 485 }
 486
 487 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
 488                             int trellis_node, int x, int y, int mbs_per_slice)
 489 {
 490     ProresContext *ctx = avctx->priv_data;
 491     int i, q, pq, xp, yp;
 492     const uint16_t *src;
 493     int slice_width_factor = av_log2(mbs_per_slice);
 494     int num_cblocks[MAX_PLANES], pwidth;
 495     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
 496     const int min_quant = ctx->profile_info->min_quant;
 497     const int max_quant = ctx->profile_info->max_quant;
 498     int error, bits, bits_limit;
 499     int mbs, prev, cur, new_score;
 500     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
 501
 502     mbs = x + mbs_per_slice;
 503
 504     for (i = 0; i < ctx->num_planes; i++) {
 505         is_chroma[i]    = (i == 1 || i == 2);
 506         plane_factor[i] = slice_width_factor + 2;
 507         if (is_chroma[i])
 508             plane_factor[i] += ctx->chroma_factor - 3;
 509         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
 510             xp             = x << 4;
 511             yp             = y << 4;
 512             num_cblocks[i] = 4;
 513             pwidth         = avctx->width;
 514         } else {
 515             xp             = x << 3;
 516             yp             = y << 4;
 517             num_cblocks[i] = 2;
 518             pwidth         = avctx->width >> 1;
 519         }
 520         src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
 521
 522         get_slice_data(ctx, src, pic->linesize[i], xp, yp,
 523                        pwidth, avctx->height, ctx->blocks[i],
 524                        mbs_per_slice, num_cblocks[i]);
 525     }
 526
 527     for (q = min_quant; q <= max_quant; q++) {
 528         ctx->nodes[trellis_node + q].prev_node = -1;
 529         ctx->nodes[trellis_node + q].quant     = q;
 530     }
 531
 532     // todo: maybe perform coarser quantising to fit into frame size when needed
 533     for (q = min_quant; q <= max_quant; q++) {
 534         bits  = 0;
 535         error = 0;
 536         for (i = 0; i < ctx->num_planes; i++) {
 537             bits += estimate_slice_plane(ctx, &error, i,
 538                                          src, pic->linesize[i],
 539                                          mbs_per_slice,
 540                                          num_cblocks[i], plane_factor[i],
 541                                          ctx->quants[q]);
 542         }
 543         if (bits > 65000 * 8) {
 544             error = SCORE_LIMIT;
 545             break;
 546         }
 547         slice_bits[q]  = bits;
 548         slice_score[q] = error;
 549     }
 550
 551     bits_limit = mbs * ctx->bits_per_mb;
 552     for (pq = min_quant; pq <= max_quant; pq++) {
 553         prev = trellis_node - TRELLIS_WIDTH + pq;
 554
 555         for (q = min_quant; q <= max_quant; q++) {
 556             cur = trellis_node + q;
 557
 558             bits  = ctx->nodes[prev].bits + slice_bits[q];
 559             error = slice_score[q];
 560             if (bits > bits_limit)
 561                 error = SCORE_LIMIT;
 562
 563             if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
 564                 new_score = ctx->nodes[prev].score + error;
 565             else
 566                 new_score = SCORE_LIMIT;
 567             if (ctx->nodes[cur].prev_node == -1 ||
 568                 ctx->nodes[cur].score >= new_score) {
 569
 570                 ctx->nodes[cur].bits      = bits;
 571                 ctx->nodes[cur].score     = new_score;
 572                 ctx->nodes[cur].prev_node = prev;
 573             }
 574         }
 575     }
 576
 577     error = ctx->nodes[trellis_node + min_quant].score;
 578     pq    = trellis_node + min_quant;
 579     for (q = min_quant + 1; q <= max_quant; q++) {
 580         if (ctx->nodes[trellis_node + q].score <= error) {
 581             error = ctx->nodes[trellis_node + q].score;
 582             pq    = trellis_node + q;
 583         }
 584     }
 585
 586     return pq;
 587 }
 588
 589 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 590                         const AVFrame *pic, int *got_packet)
 591 {
 592     ProresContext *ctx = avctx->priv_data;
 593     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
 594     uint8_t *picture_size_pos;
 595     PutBitContext pb;
 596     int x, y, i, mb, q = 0;
 597     int sizes[4] = { 0 };
 598     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
 599     int frame_size, picture_size, slice_size;
 600     int mbs_per_slice = ctx->mbs_per_slice;
 601     int pkt_size, ret;
 602
 603     *avctx->coded_frame           = *pic;
 604     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 605     avctx->coded_frame->key_frame = 1;
 606
 607     pkt_size = ctx->mb_width * ctx->mb_height * 64 * 3 * 12
 608                + ctx->num_slices * 2 + 200 + FF_MIN_BUFFER_SIZE;
 609
 610     if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
 611         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
 612         return ret;
 613     }
 614
 615     orig_buf = pkt->data;
 616
 617     // frame atom
 618     orig_buf += 4;                              // frame size
 619     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
 620     buf = orig_buf;
 621
 622     // frame header
 623     tmp = buf;
 624     buf += 2;                                   // frame header size will be stored here
 625     bytestream_put_be16  (&buf, 0);             // version 1
 626     bytestream_put_buffer(&buf, "Lavc", 4);     // creator
 627     bytestream_put_be16  (&buf, avctx->width);
 628     bytestream_put_be16  (&buf, avctx->height);
 629     bytestream_put_byte  (&buf, ctx->chroma_factor << 6); // frame flags
 630     bytestream_put_byte  (&buf, 0);             // reserved
 631     bytestream_put_byte  (&buf, 0);             // primaries
 632     bytestream_put_byte  (&buf, 0);             // transfer function
 633     bytestream_put_byte  (&buf, 6);             // colour matrix - ITU-R BT.601-4
 634     bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
 635     bytestream_put_byte  (&buf, 0);             // reserved
 636     bytestream_put_byte  (&buf, 0x03);          // matrix flags - both matrices are present
 637     // luma quantisation matrix
 638     for (i = 0; i < 64; i++)
 639         bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
 640     // chroma quantisation matrix
 641     for (i = 0; i < 64; i++)
 642         bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
 643     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 644
 645     // picture header
 646     picture_size_pos = buf + 1;
 647     bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
 648     buf += 4;                                   // picture data size will be stored here
 649     bytestream_put_be16  (&buf, ctx->num_slices); // total number of slices
 650     bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 651
 652     // seek table - will be filled during slice encoding
 653     slice_sizes = buf;
 654     buf += ctx->num_slices * 2;
 655
 656     // slices
 657     for (y = 0; y < ctx->mb_height; y++) {
 658         mbs_per_slice = ctx->mbs_per_slice;
 659         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 660             while (ctx->mb_width - x < mbs_per_slice)
 661                 mbs_per_slice >>= 1;
 662             q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
 663                                  mbs_per_slice);
 664         }
 665
 666         for (x = ctx->slices_width - 1; x >= 0; x--) {
 667             ctx->slice_q[x] = ctx->nodes[q].quant;
 668             q = ctx->nodes[q].prev_node;
 669         }
 670
 671         mbs_per_slice = ctx->mbs_per_slice;
 672         for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
 673             q = ctx->slice_q[mb];
 674
 675             while (ctx->mb_width - x < mbs_per_slice)
 676                 mbs_per_slice >>= 1;
 677
 678             bytestream_put_byte(&buf, slice_hdr_size << 3);
 679             slice_hdr = buf;
 680             buf += slice_hdr_size - 1;
 681             init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
 682             encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
 683
 684             bytestream_put_byte(&slice_hdr, q);
 685             slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
 686             for (i = 0; i < ctx->num_planes - 1; i++) {
 687                 bytestream_put_be16(&slice_hdr, sizes[i]);
 688                 slice_size += sizes[i];
 689             }
 690             bytestream_put_be16(&slice_sizes, slice_size);
 691             buf += slice_size - slice_hdr_size;
 692         }
 693     }
 694
 695     orig_buf -= 8;
 696     frame_size = buf - orig_buf;
 697     picture_size = buf - picture_size_pos - 6;
 698     bytestream_put_be32(&orig_buf, frame_size);
 699     bytestream_put_be32(&picture_size_pos, picture_size);
 700
 701     pkt->size   = frame_size;
 702     pkt->flags |= AV_PKT_FLAG_KEY;
 703     *got_packet = 1;
 704
 705     return 0;
 706 }
 707
 708 static av_cold int encode_close(AVCodecContext *avctx)
 709 {
 710     ProresContext *ctx = avctx->priv_data;
 711
 712     if (avctx->coded_frame->data[0])
 713         avctx->release_buffer(avctx, avctx->coded_frame);
 714
 715     av_freep(&avctx->coded_frame);
 716
 717     av_freep(&ctx->nodes);
 718     av_freep(&ctx->slice_q);
 719
 720     return 0;
 721 }
 722
 723 static av_cold int encode_init(AVCodecContext *avctx)
 724 {
 725     ProresContext *ctx = avctx->priv_data;
 726     int mps;
 727     int i, j;
 728     int min_quant, max_quant;
 729
 730     avctx->bits_per_raw_sample = 10;
 731     avctx->coded_frame = avcodec_alloc_frame();
 732     if (!avctx->coded_frame)
 733         return AVERROR(ENOMEM);
 734
 735     ff_proresdsp_init(&ctx->dsp);
 736     ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
 737                       ff_prores_progressive_scan);
 738
 739     mps = ctx->mbs_per_slice;
 740     if (mps & (mps - 1)) {
 741         av_log(avctx, AV_LOG_ERROR,
 742                "there should be an integer power of two MBs per slice\n");
 743         return AVERROR(EINVAL);
 744     }
 745
 746     ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
 747                          ? CFACTOR_Y422
 748                          : CFACTOR_Y444;
 749     ctx->profile_info  = prores_profile_info + ctx->profile;
 750     ctx->num_planes    = 3;
 751
 752     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
 753     ctx->mb_height     = FFALIGN(avctx->height, 16) >> 4;
 754     ctx->slices_width  = ctx->mb_width / mps;
 755     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
 756     ctx->num_slices    = ctx->mb_height * ctx->slices_width;
 757
 758     for (i = 0; i < NUM_MB_LIMITS - 1; i++)
 759         if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
 760             break;
 761     ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
 762
 763     min_quant = ctx->profile_info->min_quant;
 764     max_quant = ctx->profile_info->max_quant;
 765     for (i = min_quant; i <= max_quant; i++) {
 766         for (j = 0; j < 64; j++)
 767             ctx->quants[i][j] = ctx->profile_info->quant[j] * i;
 768     }
 769
 770     avctx->codec_tag   = ctx->profile_info->tag;
 771
 772     av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
 773            ctx->profile, ctx->num_slices, ctx->bits_per_mb);
 774
 775     ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
 776                            * sizeof(*ctx->nodes));
 777     if (!ctx->nodes) {
 778         encode_close(avctx);
 779         return AVERROR(ENOMEM);
 780     }
 781     for (i = min_quant; i <= max_quant; i++) {
 782         ctx->nodes[i].prev_node = -1;
 783         ctx->nodes[i].bits      = 0;
 784         ctx->nodes[i].score     = 0;
 785     }
 786
 787     ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
 788     if (!ctx->slice_q) {
 789         encode_close(avctx);
 790         return AVERROR(ENOMEM);
 791     }
 792
 793     return 0;
 794 }
 795
 796 #define OFFSET(x) offsetof(ProresContext, x)
 797 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 798
 799 static const AVOption options[] = {
 800     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
 801         AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
 802     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
 803         { PRORES_PROFILE_STANDARD },
 804         PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
 805     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
 806         0, 0, VE, "profile" },
 807     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
 808         0, 0, VE, "profile" },
 809     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
 810         0, 0, VE, "profile" },
 811     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
 812         0, 0, VE, "profile" },
 813     { NULL }
 814 };
 815
 816 static const AVClass proresenc_class = {
 817     .class_name = "ProRes encoder",
 818     .item_name  = av_default_item_name,
 819     .option     = options,
 820     .version    = LIBAVUTIL_VERSION_INT,
 821 };
 822
 823 AVCodec ff_prores_encoder = {
 824     .name           = "prores",
 825     .type           = AVMEDIA_TYPE_VIDEO,
 826     .id             = CODEC_ID_PRORES,
 827     .priv_data_size = sizeof(ProresContext),
 828     .init           = encode_init,
 829     .close          = encode_close,
 830     .encode2        = encode_frame,
 831     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
 832     .pix_fmts       = (const enum PixelFormat[]) {
 833                           PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
 834                       },
 835     .priv_class     = &proresenc_class,
 836 };