git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_anatoliy.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2011 Anatoliy Wasserman
   5  * Copyright (c) 2012 Konstantin Shishkov
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Apple ProRes encoder (Anatoliy Wasserman version)
  27  * Known FOURCCs: 'ap4h' (444), 'apch' (HQ), 'apcn' (422), 'apcs' (LT), 'acpo' (Proxy)
  28  */
  29
  30 #include "libavutil/opt.h"
  31 #include "avcodec.h"
  32 #include "dct.h"
  33 #include "internal.h"
  34 #include "profiles.h"
  35 #include "proresdata.h"
  36 #include "put_bits.h"
  37 #include "bytestream.h"
  38 #include "fdctdsp.h"
  39
  40 #define DEFAULT_SLICE_MB_WIDTH 8
  41
  42 static const AVProfile profiles[] = {
  43     { FF_PROFILE_PRORES_PROXY,    "apco"},
  44     { FF_PROFILE_PRORES_LT,       "apcs"},
  45     { FF_PROFILE_PRORES_STANDARD, "apcn"},
  46     { FF_PROFILE_PRORES_HQ,       "apch"},
  47     { FF_PROFILE_PRORES_4444,     "ap4h"},
  48     { FF_PROFILE_PRORES_XQ,       "ap4x"},
  49     { FF_PROFILE_UNKNOWN }
  50 };
  51
  52 static const int qp_start_table[6] = {  8, 3, 2, 1, 1, 1};
  53 static const int qp_end_table[6]   = { 13, 9, 6, 6, 5, 4};
  54 static const int bitrate_table[6]  = { 1000, 2100, 3500, 5400, 7000, 10000};
  55
  56 static const int valid_primaries[9]  = { AVCOL_PRI_RESERVED0, AVCOL_PRI_BT709, AVCOL_PRI_UNSPECIFIED, AVCOL_PRI_BT470BG,
  57                                          AVCOL_PRI_SMPTE170M, AVCOL_PRI_BT2020, AVCOL_PRI_SMPTE431, AVCOL_PRI_SMPTE432,INT_MAX };
  58 static const int valid_trc[4]        = { AVCOL_TRC_RESERVED0, AVCOL_TRC_BT709, AVCOL_TRC_UNSPECIFIED, INT_MAX };
  59 static const int valid_colorspace[5] = { AVCOL_SPC_BT709, AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_SMPTE170M,
  60                                          AVCOL_SPC_BT2020_NCL, INT_MAX };
  61
  62 static const uint8_t QMAT_LUMA[6][64] = {
  63     {
  64          4,  7,  9, 11, 13, 14, 15, 63,
  65          7,  7, 11, 12, 14, 15, 63, 63,
  66          9, 11, 13, 14, 15, 63, 63, 63,
  67         11, 11, 13, 14, 63, 63, 63, 63,
  68         11, 13, 14, 63, 63, 63, 63, 63,
  69         13, 14, 63, 63, 63, 63, 63, 63,
  70         13, 63, 63, 63, 63, 63, 63, 63,
  71         63, 63, 63, 63, 63, 63, 63, 63
  72     }, {
  73          4,  5,  6,  7,  9, 11, 13, 15,
  74          5,  5,  7,  8, 11, 13, 15, 17,
  75          6,  7,  9, 11, 13, 15, 15, 17,
  76          7,  7,  9, 11, 13, 15, 17, 19,
  77          7,  9, 11, 13, 14, 16, 19, 23,
  78          9, 11, 13, 14, 16, 19, 23, 29,
  79          9, 11, 13, 15, 17, 21, 28, 35,
  80         11, 13, 16, 17, 21, 28, 35, 41
  81     }, {
  82          4,  4,  5,  5,  6,  7,  7,  9,
  83          4,  4,  5,  6,  7,  7,  9,  9,
  84          5,  5,  6,  7,  7,  9,  9, 10,
  85          5,  5,  6,  7,  7,  9,  9, 10,
  86          5,  6,  7,  7,  8,  9, 10, 12,
  87          6,  7,  7,  8,  9, 10, 12, 15,
  88          6,  7,  7,  9, 10, 11, 14, 17,
  89          7,  7,  9, 10, 11, 14, 17, 21
  90     }, {
  91          4,  4,  4,  4,  4,  4,  4,  4,
  92          4,  4,  4,  4,  4,  4,  4,  4,
  93          4,  4,  4,  4,  4,  4,  4,  4,
  94          4,  4,  4,  4,  4,  4,  4,  5,
  95          4,  4,  4,  4,  4,  4,  5,  5,
  96          4,  4,  4,  4,  4,  5,  5,  6,
  97          4,  4,  4,  4,  5,  5,  6,  7,
  98          4,  4,  4,  4,  5,  6,  7,  7
  99     }, { /* 444 */
 100         4,  4,  4,  4,  4,  4,  4,  4,
 101         4,  4,  4,  4,  4,  4,  4,  4,
 102         4,  4,  4,  4,  4,  4,  4,  4,
 103         4,  4,  4,  4,  4,  4,  4,  5,
 104         4,  4,  4,  4,  4,  4,  5,  5,
 105         4,  4,  4,  4,  4,  5,  5,  6,
 106         4,  4,  4,  4,  5,  5,  6,  7,
 107         4,  4,  4,  4,  5,  6,  7,  7
 108     }, { /* 444 XQ */
 109         2,  2,  2,  2,  2,  2,  2,  2,
 110         2,  2,  2,  2,  2,  2,  2,  2,
 111         2,  2,  2,  2,  2,  2,  2,  2,
 112         2,  2,  2,  2,  2,  2,  2,  3,
 113         2,  2,  2,  2,  2,  2,  3,  3,
 114         2,  2,  2,  2,  2,  3,  3,  3,
 115         2,  2,  2,  2,  3,  3,  3,  4,
 116         2,  2,  2,  2,  3,  3,  4,  4,
 117     }
 118 };
 119
 120 static const uint8_t QMAT_CHROMA[6][64] = {
 121     {
 122          4,  7,  9, 11, 13, 14, 63, 63,
 123          7,  7, 11, 12, 14, 63, 63, 63,
 124          9, 11, 13, 14, 63, 63, 63, 63,
 125         11, 11, 13, 14, 63, 63, 63, 63,
 126         11, 13, 14, 63, 63, 63, 63, 63,
 127         13, 14, 63, 63, 63, 63, 63, 63,
 128         13, 63, 63, 63, 63, 63, 63, 63,
 129         63, 63, 63, 63, 63, 63, 63, 63
 130     }, {
 131          4,  5,  6,  7,  9, 11, 13, 15,
 132          5,  5,  7,  8, 11, 13, 15, 17,
 133          6,  7,  9, 11, 13, 15, 15, 17,
 134          7,  7,  9, 11, 13, 15, 17, 19,
 135          7,  9, 11, 13, 14, 16, 19, 23,
 136          9, 11, 13, 14, 16, 19, 23, 29,
 137          9, 11, 13, 15, 17, 21, 28, 35,
 138         11, 13, 16, 17, 21, 28, 35, 41
 139     }, {
 140          4,  4,  5,  5,  6,  7,  7,  9,
 141          4,  4,  5,  6,  7,  7,  9,  9,
 142          5,  5,  6,  7,  7,  9,  9, 10,
 143          5,  5,  6,  7,  7,  9,  9, 10,
 144          5,  6,  7,  7,  8,  9, 10, 12,
 145          6,  7,  7,  8,  9, 10, 12, 15,
 146          6,  7,  7,  9, 10, 11, 14, 17,
 147          7,  7,  9, 10, 11, 14, 17, 21
 148     }, {
 149          4,  4,  4,  4,  4,  4,  4,  4,
 150          4,  4,  4,  4,  4,  4,  4,  4,
 151          4,  4,  4,  4,  4,  4,  4,  4,
 152          4,  4,  4,  4,  4,  4,  4,  5,
 153          4,  4,  4,  4,  4,  4,  5,  5,
 154          4,  4,  4,  4,  4,  5,  5,  6,
 155          4,  4,  4,  4,  5,  5,  6,  7,
 156          4,  4,  4,  4,  5,  6,  7,  7
 157     }, { /* 444 */
 158         4,  4,  4,  4,  4,  4,  4,  4,
 159         4,  4,  4,  4,  4,  4,  4,  4,
 160         4,  4,  4,  4,  4,  4,  4,  4,
 161         4,  4,  4,  4,  4,  4,  4,  5,
 162         4,  4,  4,  4,  4,  4,  5,  5,
 163         4,  4,  4,  4,  4,  5,  5,  6,
 164         4,  4,  4,  4,  5,  5,  6,  7,
 165         4,  4,  4,  4,  5,  6,  7,  7
 166     }, { /* 444 xq */
 167         4,  4,  4,  4,  4,  4,  4,  4,
 168         4,  4,  4,  4,  4,  4,  4,  4,
 169         4,  4,  4,  4,  4,  4,  4,  4,
 170         4,  4,  4,  4,  4,  4,  4,  5,
 171         4,  4,  4,  4,  4,  4,  5,  5,
 172         4,  4,  4,  4,  4,  5,  5,  6,
 173         4,  4,  4,  4,  5,  5,  6,  7,
 174         4,  4,  4,  4,  5,  6,  7,  7
 175     }
 176 };
 177
 178
 179 typedef struct {
 180     AVClass *class;
 181     FDCTDSPContext fdsp;
 182     uint8_t* fill_y;
 183     uint8_t* fill_u;
 184     uint8_t* fill_v;
 185     uint8_t* fill_a;
 186
 187     int qmat_luma[16][64];
 188     int qmat_chroma[16][64];
 189
 190     int is_422;
 191     int need_alpha;
 192
 193     char *vendor;
 194 } ProresContext;
 195
 196 static void encode_codeword(PutBitContext *pb, int val, int codebook)
 197 {
 198     unsigned int rice_order, exp_order, switch_bits, first_exp, exp, zeros;
 199
 200     /* number of bits to switch between rice and exp golomb */
 201     switch_bits = codebook & 3;
 202     rice_order  = codebook >> 5;
 203     exp_order   = (codebook >> 2) & 7;
 204
 205     first_exp = ((switch_bits + 1) << rice_order);
 206
 207     if (val >= first_exp) { /* exp golomb */
 208         val -= first_exp;
 209         val += (1 << exp_order);
 210         exp = av_log2(val);
 211         zeros = exp - exp_order + switch_bits + 1;
 212         put_bits(pb, zeros, 0);
 213         put_bits(pb, exp + 1, val);
 214     } else if (rice_order) {
 215         put_bits(pb, (val >> rice_order), 0);
 216         put_bits(pb, 1, 1);
 217         put_sbits(pb, rice_order, val);
 218     } else {
 219         put_bits(pb, val, 0);
 220         put_bits(pb, 1, 1);
 221     }
 222 }
 223
 224 #define QSCALE(qmat,ind,val) ((val) / ((qmat)[ind]))
 225 #define TO_GOLOMB(val) (((val) << 1) ^ ((val) >> 31))
 226 #define DIFF_SIGN(val, sign) (((val) >> 31) ^ (sign))
 227 #define IS_NEGATIVE(val) ((((val) >> 31) ^ -1) + 1)
 228 #define TO_GOLOMB2(val,sign) ((val)==0 ? 0 : ((val) << 1) + (sign))
 229
 230 static av_always_inline int get_level(int val)
 231 {
 232     int sign = (val >> 31);
 233     return (val ^ sign) - sign;
 234 }
 235
 236 #define FIRST_DC_CB 0xB8
 237
 238 static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
 239
 240 static void encode_dc_coeffs(PutBitContext *pb, int16_t *in,
 241         int blocks_per_slice, int *qmat)
 242 {
 243     int prev_dc, code;
 244     int i, sign, idx;
 245     int new_dc, delta, diff_sign, new_code;
 246
 247     prev_dc = QSCALE(qmat, 0, in[0] - 16384);
 248     code = TO_GOLOMB(prev_dc);
 249     encode_codeword(pb, code, FIRST_DC_CB);
 250
 251     code = 5; sign = 0; idx = 64;
 252     for (i = 1; i < blocks_per_slice; i++, idx += 64) {
 253         new_dc    = QSCALE(qmat, 0, in[idx] - 16384);
 254         delta     = new_dc - prev_dc;
 255         diff_sign = DIFF_SIGN(delta, sign);
 256         new_code  = TO_GOLOMB2(get_level(delta), diff_sign);
 257
 258         encode_codeword(pb, new_code, dc_codebook[FFMIN(code, 6)]);
 259
 260         code      = new_code;
 261         sign      = delta >> 31;
 262         prev_dc   = new_dc;
 263     }
 264 }
 265
 266 static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29,
 267         0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C };
 268 static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28,
 269         0x28, 0x28, 0x28, 0x4C };
 270
 271 static void encode_ac_coeffs(PutBitContext *pb,
 272         int16_t *in, int blocks_per_slice, int *qmat)
 273 {
 274     int prev_run = 4;
 275     int prev_level = 2;
 276
 277     int run = 0, level, code, i, j;
 278     for (i = 1; i < 64; i++) {
 279         int indp = ff_prores_progressive_scan[i];
 280         for (j = 0; j < blocks_per_slice; j++) {
 281             int val = QSCALE(qmat, indp, in[(j << 6) + indp]);
 282             if (val) {
 283                 encode_codeword(pb, run, run_to_cb[FFMIN(prev_run, 15)]);
 284
 285                 prev_run   = run;
 286                 run        = 0;
 287                 level      = get_level(val);
 288                 code       = level - 1;
 289
 290                 encode_codeword(pb, code, lev_to_cb[FFMIN(prev_level, 9)]);
 291
 292                 prev_level = level;
 293
 294                 put_bits(pb, 1, IS_NEGATIVE(val));
 295             } else {
 296                 ++run;
 297             }
 298         }
 299     }
 300 }
 301
 302 static void get(uint8_t *pixels, int stride, int16_t* block)
 303 {
 304     int i;
 305
 306     for (i = 0; i < 8; i++) {
 307         AV_WN64(block, AV_RN64(pixels));
 308         AV_WN64(block+4, AV_RN64(pixels+8));
 309         pixels += stride;
 310         block += 8;
 311     }
 312 }
 313
 314 static void fdct_get(FDCTDSPContext *fdsp, uint8_t *pixels, int stride, int16_t* block)
 315 {
 316     get(pixels, stride, block);
 317     fdsp->fdct(block);
 318 }
 319
 320 static void calc_plane_dct(FDCTDSPContext *fdsp, uint8_t *src, int16_t * blocks, int src_stride, int mb_count, int chroma, int is_422)
 321 {
 322     int16_t *block;
 323     int i;
 324
 325     block = blocks;
 326
 327     if (!chroma) { /* Luma plane */
 328         for (i = 0; i < mb_count; i++) {
 329             fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
 330             fdct_get(fdsp, src + 16,                  src_stride, block + (1 << 6));
 331             fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (2 << 6));
 332             fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
 333
 334             block += 256;
 335             src   += 32;
 336         }
 337     } else if (chroma && is_422){ /* chroma plane 422 */
 338         for (i = 0; i < mb_count; i++) {
 339             fdct_get(fdsp, src,                  src_stride, block + (0 << 6));
 340             fdct_get(fdsp, src + 8 * src_stride, src_stride, block + (1 << 6));
 341             block += (256 >> 1);
 342             src   += (32  >> 1);
 343         }
 344     } else { /* chroma plane 444 */
 345         for (i = 0; i < mb_count; i++) {
 346             fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
 347             fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (1 << 6));
 348             fdct_get(fdsp, src + 16,                  src_stride, block + (2 << 6));
 349             fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
 350
 351             block += 256;
 352             src   += 32;
 353         }
 354     }
 355 }
 356
 357 static int encode_slice_plane(int16_t *blocks, int mb_count, uint8_t *buf, unsigned buf_size, int *qmat, int sub_sample_chroma)
 358 {
 359     int blocks_per_slice;
 360     PutBitContext pb;
 361
 362     blocks_per_slice = mb_count << (2 - sub_sample_chroma);
 363     init_put_bits(&pb, buf, buf_size);
 364
 365     encode_dc_coeffs(&pb, blocks, blocks_per_slice, qmat);
 366     encode_ac_coeffs(&pb, blocks, blocks_per_slice, qmat);
 367
 368     flush_put_bits(&pb);
 369     return put_bits_ptr(&pb) - pb.buf;
 370 }
 371
 372 static av_always_inline unsigned encode_slice_data(AVCodecContext *avctx,
 373                                                    int16_t * blocks_y, int16_t * blocks_u, int16_t * blocks_v,
 374                                                    unsigned mb_count, uint8_t *buf, unsigned data_size,
 375                                                    unsigned* y_data_size, unsigned* u_data_size, unsigned* v_data_size,
 376                                                    int qp)
 377 {
 378     ProresContext* ctx = avctx->priv_data;
 379
 380     *y_data_size = encode_slice_plane(blocks_y, mb_count,
 381                                       buf, data_size, ctx->qmat_luma[qp - 1], 0);
 382
 383     if (!(avctx->flags & AV_CODEC_FLAG_GRAY)) {
 384         *u_data_size = encode_slice_plane(blocks_u, mb_count, buf + *y_data_size, data_size - *y_data_size,
 385                                           ctx->qmat_chroma[qp - 1], ctx->is_422);
 386
 387         *v_data_size = encode_slice_plane(blocks_v, mb_count, buf + *y_data_size + *u_data_size,
 388                                           data_size - *y_data_size - *u_data_size,
 389                                           ctx->qmat_chroma[qp - 1], ctx->is_422);
 390     }
 391
 392     return *y_data_size + *u_data_size + *v_data_size;
 393 }
 394
 395 static void put_alpha_diff(PutBitContext *pb, int cur, int prev)
 396 {
 397     const int abits = 16;
 398     const int dbits = 7;
 399     const int dsize = 1 << dbits - 1;
 400     int diff = cur - prev;
 401
 402     diff = av_mod_uintp2(diff, abits);
 403     if (diff >= (1 << abits) - dsize)
 404         diff -= 1 << abits;
 405     if (diff < -dsize || diff > dsize || !diff) {
 406         put_bits(pb, 1, 1);
 407         put_bits(pb, abits, diff);
 408     } else {
 409         put_bits(pb, 1, 0);
 410         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 411         put_bits(pb, 1, diff < 0);
 412     }
 413 }
 414
 415 static inline void put_alpha_run(PutBitContext *pb, int run)
 416 {
 417     if (run) {
 418         put_bits(pb, 1, 0);
 419         if (run < 0x10)
 420             put_bits(pb, 4, run);
 421         else
 422             put_bits(pb, 15, run);
 423     } else {
 424         put_bits(pb, 1, 1);
 425     }
 426 }
 427
 428 static av_always_inline int encode_alpha_slice_data(AVCodecContext *avctx, int8_t * src_a,
 429                                                    unsigned mb_count, uint8_t *buf, unsigned data_size, unsigned* a_data_size)
 430 {
 431     const int abits = 16;
 432     const int mask  = (1 << abits) - 1;
 433     const int num_coeffs = mb_count * 256;
 434     int prev = mask, cur;
 435     int idx = 0;
 436     int run = 0;
 437     int16_t * blocks = (int16_t *)src_a;
 438     PutBitContext pb;
 439     init_put_bits(&pb, buf, data_size);
 440
 441     cur = blocks[idx++];
 442     put_alpha_diff(&pb, cur, prev);
 443     prev = cur;
 444     do {
 445         cur = blocks[idx++];
 446         if (cur != prev) {
 447             put_alpha_run (&pb, run);
 448             put_alpha_diff(&pb, cur, prev);
 449             prev = cur;
 450             run  = 0;
 451         } else {
 452             run++;
 453         }
 454     } while (idx < num_coeffs);
 455     if (run)
 456         put_alpha_run(&pb, run);
 457     flush_put_bits(&pb);
 458     *a_data_size = put_bits_count(&pb) >> 3;
 459
 460     if (put_bits_left(&pb) < 0) {
 461         av_log(avctx, AV_LOG_ERROR,
 462                "Underestimated required buffer size.\n");
 463         return AVERROR_BUG;
 464     } else {
 465         return 0;
 466     }
 467 }
 468
 469 static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
 470         unsigned stride, unsigned width, unsigned height, uint16_t *dst,
 471         unsigned dst_width, unsigned dst_height)
 472 {
 473
 474     int box_width = FFMIN(width - x, dst_width);
 475     int box_height = FFMIN(height - y, dst_height);
 476     int i, j, src_stride = stride >> 1;
 477     uint16_t last_pix, *last_line;
 478
 479     src += y * src_stride + x;
 480     for (i = 0; i < box_height; ++i) {
 481         for (j = 0; j < box_width; ++j) {
 482             dst[j] = src[j];
 483         }
 484         last_pix = dst[j - 1];
 485         for (; j < dst_width; j++)
 486             dst[j] = last_pix;
 487         src += src_stride;
 488         dst += dst_width;
 489     }
 490     last_line = dst - dst_width;
 491     for (; i < dst_height; i++) {
 492         for (j = 0; j < dst_width; ++j) {
 493             dst[j] = last_line[j];
 494         }
 495         dst += dst_width;
 496     }
 497 }
 498
 499 /* reorganize alpha data and convert 10b -> 16b */
 500 static void subimage_alpha_with_fill(uint16_t *src, unsigned x, unsigned y,
 501                                unsigned stride, unsigned width, unsigned height, uint16_t *dst,
 502                                unsigned dst_width, unsigned dst_height)
 503 {
 504     int box_width = FFMIN(width - x, dst_width);
 505     int box_height = FFMIN(height - y, dst_height);
 506     int i, j, src_stride = stride >> 1;
 507     uint16_t last_pix, *last_line;
 508
 509     src += y * src_stride + x;
 510     for (i = 0; i < box_height; ++i) {
 511         for (j = 0; j < box_width; ++j) {
 512             dst[j] = src[j] << 6; /* 10b to 16b */
 513         }
 514         last_pix = dst[j - 1] << 6; /* 10b to 16b */
 515         for (; j < dst_width; j++)
 516             dst[j] = last_pix;
 517         src += src_stride;
 518         dst += dst_width;
 519     }
 520     last_line = dst - dst_width;
 521     for (; i < dst_height; i++) {
 522         for (j = 0; j < dst_width; ++j) {
 523             dst[j] = last_line[j];
 524         }
 525         dst += dst_width;
 526     }
 527 }
 528
 529 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, int mb_x,
 530         int mb_y, unsigned mb_count, uint8_t *buf, unsigned data_size,
 531         int unsafe, int *qp)
 532 {
 533     int luma_stride, chroma_stride, alpha_stride = 0;
 534     ProresContext* ctx = avctx->priv_data;
 535     int hdr_size = 6 + (ctx->need_alpha * 2); /* v data size is write when there is alpha */
 536     int ret = 0, slice_size;
 537     uint8_t *dest_y, *dest_u, *dest_v;
 538     unsigned y_data_size = 0, u_data_size = 0, v_data_size = 0, a_data_size = 0;
 539     FDCTDSPContext *fdsp = &ctx->fdsp;
 540     int tgt_bits   = (mb_count * bitrate_table[avctx->profile]) >> 2;
 541     int low_bytes  = (tgt_bits - (tgt_bits >> 3)) >> 3; // 12% bitrate fluctuation
 542     int high_bytes = (tgt_bits + (tgt_bits >> 3)) >> 3;
 543
 544     LOCAL_ALIGNED(16, int16_t, blocks_y, [DEFAULT_SLICE_MB_WIDTH << 8]);
 545     LOCAL_ALIGNED(16, int16_t, blocks_u, [DEFAULT_SLICE_MB_WIDTH << 8]);
 546     LOCAL_ALIGNED(16, int16_t, blocks_v, [DEFAULT_SLICE_MB_WIDTH << 8]);
 547
 548     luma_stride   = pic->linesize[0];
 549     chroma_stride = pic->linesize[1];
 550
 551     if (ctx->need_alpha)
 552         alpha_stride = pic->linesize[3];
 553
 554     dest_y = pic->data[0] + (mb_y << 4) * luma_stride   + (mb_x << 5);
 555     dest_u = pic->data[1] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
 556     dest_v = pic->data[2] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
 557
 558     if (unsafe) {
 559         subimage_with_fill((uint16_t *) pic->data[0], mb_x << 4, mb_y << 4,
 560                 luma_stride, avctx->width, avctx->height,
 561                 (uint16_t *) ctx->fill_y, mb_count << 4, 16);
 562         subimage_with_fill((uint16_t *) pic->data[1], mb_x << (4 - ctx->is_422), mb_y << 4,
 563                            chroma_stride, avctx->width >> ctx->is_422, avctx->height,
 564                            (uint16_t *) ctx->fill_u, mb_count << (4 - ctx->is_422), 16);
 565         subimage_with_fill((uint16_t *) pic->data[2], mb_x << (4 - ctx->is_422), mb_y << 4,
 566                            chroma_stride, avctx->width >> ctx->is_422, avctx->height,
 567                            (uint16_t *) ctx->fill_v, mb_count << (4 - ctx->is_422), 16);
 568
 569         calc_plane_dct(fdsp, ctx->fill_y, blocks_y, mb_count <<  5,                mb_count, 0, 0);
 570         calc_plane_dct(fdsp, ctx->fill_u, blocks_u, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
 571         calc_plane_dct(fdsp, ctx->fill_v, blocks_v, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
 572
 573         slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 574                           mb_count, buf + hdr_size, data_size - hdr_size,
 575                           &y_data_size, &u_data_size, &v_data_size,
 576                           *qp);
 577     } else {
 578         calc_plane_dct(fdsp, dest_y, blocks_y, luma_stride, mb_count, 0, 0);
 579         calc_plane_dct(fdsp, dest_u, blocks_u, chroma_stride, mb_count, 1, ctx->is_422);
 580         calc_plane_dct(fdsp, dest_v, blocks_v, chroma_stride, mb_count, 1, ctx->is_422);
 581
 582         slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 583                           mb_count, buf + hdr_size, data_size - hdr_size,
 584                           &y_data_size, &u_data_size, &v_data_size,
 585                           *qp);
 586
 587         if (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]) {
 588             do {
 589                 *qp += 1;
 590                 slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 591                                                mb_count, buf + hdr_size, data_size - hdr_size,
 592                                                &y_data_size, &u_data_size, &v_data_size,
 593                                                *qp);
 594             } while (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]);
 595         } else if (slice_size < low_bytes && *qp
 596                 > qp_start_table[avctx->profile]) {
 597             do {
 598                 *qp -= 1;
 599                 slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 600                                                mb_count, buf + hdr_size, data_size - hdr_size,
 601                                                &y_data_size, &u_data_size, &v_data_size,
 602                                                *qp);
 603             } while (slice_size < low_bytes && *qp > qp_start_table[avctx->profile]);
 604         }
 605     }
 606
 607     buf[0] = hdr_size << 3;
 608     buf[1] = *qp;
 609     AV_WB16(buf + 2, y_data_size);
 610     AV_WB16(buf + 4, u_data_size);
 611
 612     if (ctx->need_alpha) {
 613         AV_WB16(buf + 6, v_data_size); /* write v data size only if there is alpha */
 614
 615         subimage_alpha_with_fill((uint16_t *) pic->data[3], mb_x << 4, mb_y << 4,
 616                            alpha_stride, avctx->width, avctx->height,
 617                            (uint16_t *) ctx->fill_a, mb_count << 4, 16);
 618         ret = encode_alpha_slice_data(avctx, ctx->fill_a, mb_count,
 619                                       buf + hdr_size + slice_size,
 620                                       data_size - hdr_size - slice_size, &a_data_size);
 621     }
 622
 623     if (ret != 0) {
 624         return ret;
 625     }
 626     return hdr_size + y_data_size + u_data_size + v_data_size + a_data_size;
 627 }
 628
 629 static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
 630         uint8_t *buf, const int buf_size)
 631 {
 632     int mb_width = (avctx->width + 15) >> 4;
 633     int mb_height = (avctx->height + 15) >> 4;
 634     int hdr_size, sl_size, i;
 635     int mb_y, sl_data_size, qp;
 636     int unsafe_bot, unsafe_right;
 637     uint8_t *sl_data, *sl_data_sizes;
 638     int slice_per_line = 0, rem = mb_width;
 639
 640     for (i = av_log2(DEFAULT_SLICE_MB_WIDTH); i >= 0; --i) {
 641         slice_per_line += rem >> i;
 642         rem &= (1 << i) - 1;
 643     }
 644
 645     qp = qp_start_table[avctx->profile];
 646     hdr_size = 8; sl_data_size = buf_size - hdr_size;
 647     sl_data_sizes = buf + hdr_size;
 648     sl_data = sl_data_sizes + (slice_per_line * mb_height * 2);
 649     for (mb_y = 0; mb_y < mb_height; mb_y++) {
 650         int mb_x = 0;
 651         int slice_mb_count = DEFAULT_SLICE_MB_WIDTH;
 652         while (mb_x < mb_width) {
 653             while (mb_width - mb_x < slice_mb_count)
 654                 slice_mb_count >>= 1;
 655
 656             unsafe_bot = (avctx->height & 0xf) && (mb_y == mb_height - 1);
 657             unsafe_right = (avctx->width & 0xf) && (mb_x + slice_mb_count == mb_width);
 658
 659             sl_size = encode_slice(avctx, pic, mb_x, mb_y, slice_mb_count,
 660                     sl_data, sl_data_size, unsafe_bot || unsafe_right, &qp);
 661             if (sl_size < 0){
 662                 return sl_size;
 663             }
 664
 665             bytestream_put_be16(&sl_data_sizes, sl_size);
 666             sl_data           += sl_size;
 667             sl_data_size      -= sl_size;
 668             mb_x              += slice_mb_count;
 669         }
 670     }
 671
 672     buf[0] = hdr_size << 3;
 673     AV_WB32(buf + 1, sl_data - buf);
 674     AV_WB16(buf + 5, slice_per_line * mb_height);
 675     buf[7] = av_log2(DEFAULT_SLICE_MB_WIDTH) << 4;
 676
 677     return sl_data - buf;
 678 }
 679
 680 static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 681                                const AVFrame *pict, int *got_packet)
 682 {
 683     ProresContext *ctx = avctx->priv_data;
 684     int header_size = 148;
 685     uint8_t *buf;
 686     int pic_size, ret;
 687     int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + AV_INPUT_BUFFER_MIN_SIZE; //FIXME choose tighter limit
 688
 689
 690     if ((ret = ff_alloc_packet2(avctx, pkt, frame_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
 691         return ret;
 692
 693     buf = pkt->data;
 694     pic_size = prores_encode_picture(avctx, pict, buf + header_size + 8,
 695             pkt->size - header_size - 8);
 696     if (pic_size < 0) {
 697         return pic_size;
 698     }
 699
 700     bytestream_put_be32(&buf, pic_size + 8 + header_size);
 701     bytestream_put_buffer(&buf, "icpf", 4);
 702
 703     bytestream_put_be16(&buf, header_size);
 704     bytestream_put_be16(&buf, 0); /* version */
 705     bytestream_put_buffer(&buf, ctx->vendor, 4);
 706     bytestream_put_be16(&buf, avctx->width);
 707     bytestream_put_be16(&buf, avctx->height);
 708     if (avctx->profile >= FF_PROFILE_PRORES_4444) { /* 4444 or 4444 Xq */
 709         *buf++ = 0xC2; // 444, not interlaced
 710     } else {
 711         *buf++ = 0x82; // 422, not interlaced
 712     }
 713     *buf++ = 0; /* reserved */
 714     /* only write color properties, if valid value. set to unspecified otherwise */
 715     *buf++ = ff_int_from_list_or_default(avctx, "frame color primaries", pict->color_primaries, valid_primaries, 0);
 716     *buf++ = ff_int_from_list_or_default(avctx, "frame color trc", pict->color_trc, valid_trc, 0);
 717     *buf++ = ff_int_from_list_or_default(avctx, "frame colorspace", pict->colorspace, valid_colorspace, 0);
 718     if (avctx->profile >= FF_PROFILE_PRORES_4444) {
 719         if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
 720             *buf++ = 0xA0;/* src b64a and no alpha */
 721         } else {
 722             *buf++ = 0xA2;/* src b64a and 16b alpha */
 723         }
 724     } else {
 725         *buf++ = 32;/* src v210 and no alpha */
 726     }
 727     *buf++ = 0; /* reserved */
 728     *buf++ = 3; /* luma and chroma matrix present */
 729
 730     bytestream_put_buffer(&buf, QMAT_LUMA[avctx->profile],   64);
 731     bytestream_put_buffer(&buf, QMAT_CHROMA[avctx->profile], 64);
 732
 733     pkt->flags |= AV_PKT_FLAG_KEY;
 734     pkt->size = pic_size + 8 + header_size;
 735     *got_packet = 1;
 736
 737     return 0;
 738 }
 739
 740 static void scale_mat(const uint8_t* src, int* dst, int scale)
 741 {
 742     int i;
 743     for (i = 0; i < 64; i++)
 744         dst[i] = src[i] * scale;
 745 }
 746
 747 static av_cold int prores_encode_init(AVCodecContext *avctx)
 748 {
 749     int i;
 750     ProresContext* ctx = avctx->priv_data;
 751
 752     avctx->bits_per_raw_sample = 10;
 753     ctx->need_alpha = 0;
 754
 755     if (avctx->width & 0x1) {
 756         av_log(avctx, AV_LOG_ERROR,
 757                 "frame width needs to be multiple of 2\n");
 758         return AVERROR(EINVAL);
 759     }
 760
 761     if (avctx->width > 65534 || avctx->height > 65535) {
 762         av_log(avctx, AV_LOG_ERROR,
 763                 "The maximum dimensions are 65534x65535\n");
 764         return AVERROR(EINVAL);
 765     }
 766
 767     if (strlen(ctx->vendor) != 4) {
 768         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 769         return AVERROR(EINVAL);
 770     }
 771
 772     if (avctx->profile == FF_PROFILE_UNKNOWN) {
 773         if (avctx->pix_fmt == AV_PIX_FMT_YUV422P10) {
 774             avctx->profile = FF_PROFILE_PRORES_STANDARD;
 775             av_log(avctx, AV_LOG_INFO,
 776                 "encoding with ProRes standard (apcn) profile\n");
 777         } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
 778             avctx->profile = FF_PROFILE_PRORES_4444;
 779             av_log(avctx, AV_LOG_INFO,
 780                    "encoding with ProRes 4444 (ap4h) profile\n");
 781         } else if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
 782             avctx->profile = FF_PROFILE_PRORES_4444;
 783             av_log(avctx, AV_LOG_INFO,
 784                    "encoding with ProRes 4444+ (ap4h) profile\n");
 785         } else {
 786             av_log(avctx, AV_LOG_ERROR, "Unknown pixel format\n");
 787             return AVERROR(EINVAL);
 788         }
 789     } else if (avctx->profile < FF_PROFILE_PRORES_PROXY
 790             || avctx->profile > FF_PROFILE_PRORES_XQ) {
 791         av_log(
 792                 avctx,
 793                 AV_LOG_ERROR,
 794                 "unknown profile %d, use [0 - apco, 1 - apcs, 2 - apcn (default), 3 - apch, 4 - ap4h, 5 - ap4x]\n",
 795                 avctx->profile);
 796         return AVERROR(EINVAL);
 797     } else if ((avctx->pix_fmt == AV_PIX_FMT_YUV422P10) && (avctx->profile > FF_PROFILE_PRORES_HQ)){
 798         av_log(avctx, AV_LOG_ERROR,
 799                "encoding with ProRes 444/Xq (ap4h/ap4x) profile, need YUV444P10 input\n");
 800         return AVERROR(EINVAL);
 801     }  else if ((avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10)
 802                 && (avctx->profile < FF_PROFILE_PRORES_4444)){
 803         av_log(avctx, AV_LOG_ERROR,
 804                "encoding with ProRes Proxy/LT/422/422 HQ (apco, apcs, apcn, ap4h) profile, need YUV422P10 input\n");
 805         return AVERROR(EINVAL);
 806     }
 807
 808     if (avctx->profile < FF_PROFILE_PRORES_4444) { /* 422 versions */
 809         ctx->is_422 = 1;
 810         if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
 811             ctx->fill_y = av_malloc(4 * (DEFAULT_SLICE_MB_WIDTH << 8));
 812             if (!ctx->fill_y)
 813                 return AVERROR(ENOMEM);
 814             ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
 815             ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 8);
 816         }
 817     } else { /* 444 */
 818         ctx->is_422 = 0;
 819         if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
 820             ctx->fill_y = av_malloc(3 * (DEFAULT_SLICE_MB_WIDTH << 9));
 821             if (!ctx->fill_y)
 822                 return AVERROR(ENOMEM);
 823             ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
 824             ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 9);
 825         }
 826         if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
 827             ctx->need_alpha = 1;
 828             ctx->fill_a = av_malloc(DEFAULT_SLICE_MB_WIDTH << 9); /* 8 blocks x 16px x 16px x sizeof (uint16) */
 829             if (!ctx->fill_a)
 830                 return AVERROR(ENOMEM);
 831         }
 832     }
 833
 834     ff_fdctdsp_init(&ctx->fdsp, avctx);
 835
 836     avctx->codec_tag = AV_RL32((const uint8_t*)profiles[avctx->profile].name);
 837
 838     for (i = 1; i <= 16; i++) {
 839         scale_mat(QMAT_LUMA[avctx->profile]  , ctx->qmat_luma[i - 1]  , i);
 840         scale_mat(QMAT_CHROMA[avctx->profile], ctx->qmat_chroma[i - 1], i);
 841     }
 842
 843     return 0;
 844 }
 845
 846 static av_cold int prores_encode_close(AVCodecContext *avctx)
 847 {
 848     ProresContext* ctx = avctx->priv_data;
 849     av_freep(&ctx->fill_y);
 850     av_freep(&ctx->fill_a);
 851
 852     return 0;
 853 }
 854
 855 #define OFFSET(x) offsetof(ProresContext, x)
 856 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 857
 858 static const AVOption options[] = {
 859     { "vendor", "vendor ID", OFFSET(vendor), AV_OPT_TYPE_STRING, { .str = "fmpg" }, CHAR_MIN, CHAR_MAX, VE },
 860     { NULL }
 861 };
 862
 863 static const AVClass proresaw_enc_class = {
 864     .class_name = "ProResAw encoder",
 865     .item_name  = av_default_item_name,
 866     .option     = options,
 867     .version    = LIBAVUTIL_VERSION_INT,
 868 };
 869
 870 static const AVClass prores_enc_class = {
 871     .class_name = "ProRes encoder",
 872     .item_name  = av_default_item_name,
 873     .option     = options,
 874     .version    = LIBAVUTIL_VERSION_INT,
 875 };
 876
 877 AVCodec ff_prores_aw_encoder = {
 878     .name           = "prores_aw",
 879     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
 880     .type           = AVMEDIA_TYPE_VIDEO,
 881     .id             = AV_CODEC_ID_PRORES,
 882     .priv_data_size = sizeof(ProresContext),
 883     .init           = prores_encode_init,
 884     .close          = prores_encode_close,
 885     .encode2        = prores_encode_frame,
 886     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
 887     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
 888     .priv_class     = &proresaw_enc_class,
 889     .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 890 };
 891
 892 AVCodec ff_prores_encoder = {
 893     .name           = "prores",
 894     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
 895     .type           = AVMEDIA_TYPE_VIDEO,
 896     .id             = AV_CODEC_ID_PRORES,
 897     .priv_data_size = sizeof(ProresContext),
 898     .init           = prores_encode_init,
 899     .close          = prores_encode_close,
 900     .encode2        = prores_encode_frame,
 901     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
 902     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
 903     .priv_class     = &prores_enc_class,
 904     .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 905 };