git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_anatoliy.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2011 Anatoliy Wasserman
   5  * Copyright (c) 2012 Konstantin Shishkov
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Apple ProRes encoder (Anatoliy Wasserman version)
  27  * Known FOURCCs: 'ap4h' (444), 'apch' (HQ), 'apcn' (422), 'apcs' (LT), 'acpo' (Proxy)
  28  */
  29
  30 #include "libavutil/opt.h"
  31 #include "avcodec.h"
  32 #include "dct.h"
  33 #include "internal.h"
  34 #include "profiles.h"
  35 #include "proresdata.h"
  36 #include "put_bits.h"
  37 #include "bytestream.h"
  38 #include "fdctdsp.h"
  39
  40 #define DEFAULT_SLICE_MB_WIDTH 8
  41
  42 static const AVProfile profiles[] = {
  43     { FF_PROFILE_PRORES_PROXY,    "apco"},
  44     { FF_PROFILE_PRORES_LT,       "apcs"},
  45     { FF_PROFILE_PRORES_STANDARD, "apcn"},
  46     { FF_PROFILE_PRORES_HQ,       "apch"},
  47     { FF_PROFILE_PRORES_4444,     "ap4h"},
  48     { FF_PROFILE_PRORES_XQ,       "ap4x"},
  49     { FF_PROFILE_UNKNOWN }
  50 };
  51
  52 static const int qp_start_table[6] = {  8, 3, 2, 1, 1, 1};
  53 static const int qp_end_table[6]   = { 13, 9, 6, 6, 5, 4};
  54 static const int bitrate_table[6]  = { 1000, 2100, 3500, 5400, 7000, 10000};
  55
  56 static const int valid_primaries[9]  = { AVCOL_PRI_RESERVED0, AVCOL_PRI_BT709, AVCOL_PRI_UNSPECIFIED, AVCOL_PRI_BT470BG,
  57                                          AVCOL_PRI_SMPTE170M, AVCOL_PRI_BT2020, AVCOL_PRI_SMPTE431, AVCOL_PRI_SMPTE432,INT_MAX };
  58 static const int valid_trc[4]        = { AVCOL_TRC_RESERVED0, AVCOL_TRC_BT709, AVCOL_TRC_UNSPECIFIED, INT_MAX };
  59 static const int valid_colorspace[5] = { AVCOL_SPC_BT709, AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_SMPTE170M,
  60                                          AVCOL_SPC_BT2020_NCL, INT_MAX };
  61
  62 static const uint8_t QMAT_LUMA[6][64] = {
  63     {
  64          4,  7,  9, 11, 13, 14, 15, 63,
  65          7,  7, 11, 12, 14, 15, 63, 63,
  66          9, 11, 13, 14, 15, 63, 63, 63,
  67         11, 11, 13, 14, 63, 63, 63, 63,
  68         11, 13, 14, 63, 63, 63, 63, 63,
  69         13, 14, 63, 63, 63, 63, 63, 63,
  70         13, 63, 63, 63, 63, 63, 63, 63,
  71         63, 63, 63, 63, 63, 63, 63, 63
  72     }, {
  73          4,  5,  6,  7,  9, 11, 13, 15,
  74          5,  5,  7,  8, 11, 13, 15, 17,
  75          6,  7,  9, 11, 13, 15, 15, 17,
  76          7,  7,  9, 11, 13, 15, 17, 19,
  77          7,  9, 11, 13, 14, 16, 19, 23,
  78          9, 11, 13, 14, 16, 19, 23, 29,
  79          9, 11, 13, 15, 17, 21, 28, 35,
  80         11, 13, 16, 17, 21, 28, 35, 41
  81     }, {
  82          4,  4,  5,  5,  6,  7,  7,  9,
  83          4,  4,  5,  6,  7,  7,  9,  9,
  84          5,  5,  6,  7,  7,  9,  9, 10,
  85          5,  5,  6,  7,  7,  9,  9, 10,
  86          5,  6,  7,  7,  8,  9, 10, 12,
  87          6,  7,  7,  8,  9, 10, 12, 15,
  88          6,  7,  7,  9, 10, 11, 14, 17,
  89          7,  7,  9, 10, 11, 14, 17, 21
  90     }, {
  91          4,  4,  4,  4,  4,  4,  4,  4,
  92          4,  4,  4,  4,  4,  4,  4,  4,
  93          4,  4,  4,  4,  4,  4,  4,  4,
  94          4,  4,  4,  4,  4,  4,  4,  5,
  95          4,  4,  4,  4,  4,  4,  5,  5,
  96          4,  4,  4,  4,  4,  5,  5,  6,
  97          4,  4,  4,  4,  5,  5,  6,  7,
  98          4,  4,  4,  4,  5,  6,  7,  7
  99     }, { /* 444 */
 100         4,  4,  4,  4,  4,  4,  4,  4,
 101         4,  4,  4,  4,  4,  4,  4,  4,
 102         4,  4,  4,  4,  4,  4,  4,  4,
 103         4,  4,  4,  4,  4,  4,  4,  5,
 104         4,  4,  4,  4,  4,  4,  5,  5,
 105         4,  4,  4,  4,  4,  5,  5,  6,
 106         4,  4,  4,  4,  5,  5,  6,  7,
 107         4,  4,  4,  4,  5,  6,  7,  7
 108     }, { /* 444 XQ */
 109         2,  2,  2,  2,  2,  2,  2,  2,
 110         2,  2,  2,  2,  2,  2,  2,  2,
 111         2,  2,  2,  2,  2,  2,  2,  2,
 112         2,  2,  2,  2,  2,  2,  2,  3,
 113         2,  2,  2,  2,  2,  2,  3,  3,
 114         2,  2,  2,  2,  2,  3,  3,  3,
 115         2,  2,  2,  2,  3,  3,  3,  4,
 116         2,  2,  2,  2,  3,  3,  4,  4,
 117     }
 118 };
 119
 120 static const uint8_t QMAT_CHROMA[6][64] = {
 121     {
 122          4,  7,  9, 11, 13, 14, 63, 63,
 123          7,  7, 11, 12, 14, 63, 63, 63,
 124          9, 11, 13, 14, 63, 63, 63, 63,
 125         11, 11, 13, 14, 63, 63, 63, 63,
 126         11, 13, 14, 63, 63, 63, 63, 63,
 127         13, 14, 63, 63, 63, 63, 63, 63,
 128         13, 63, 63, 63, 63, 63, 63, 63,
 129         63, 63, 63, 63, 63, 63, 63, 63
 130     }, {
 131          4,  5,  6,  7,  9, 11, 13, 15,
 132          5,  5,  7,  8, 11, 13, 15, 17,
 133          6,  7,  9, 11, 13, 15, 15, 17,
 134          7,  7,  9, 11, 13, 15, 17, 19,
 135          7,  9, 11, 13, 14, 16, 19, 23,
 136          9, 11, 13, 14, 16, 19, 23, 29,
 137          9, 11, 13, 15, 17, 21, 28, 35,
 138         11, 13, 16, 17, 21, 28, 35, 41
 139     }, {
 140          4,  4,  5,  5,  6,  7,  7,  9,
 141          4,  4,  5,  6,  7,  7,  9,  9,
 142          5,  5,  6,  7,  7,  9,  9, 10,
 143          5,  5,  6,  7,  7,  9,  9, 10,
 144          5,  6,  7,  7,  8,  9, 10, 12,
 145          6,  7,  7,  8,  9, 10, 12, 15,
 146          6,  7,  7,  9, 10, 11, 14, 17,
 147          7,  7,  9, 10, 11, 14, 17, 21
 148     }, {
 149          4,  4,  4,  4,  4,  4,  4,  4,
 150          4,  4,  4,  4,  4,  4,  4,  4,
 151          4,  4,  4,  4,  4,  4,  4,  4,
 152          4,  4,  4,  4,  4,  4,  4,  5,
 153          4,  4,  4,  4,  4,  4,  5,  5,
 154          4,  4,  4,  4,  4,  5,  5,  6,
 155          4,  4,  4,  4,  5,  5,  6,  7,
 156          4,  4,  4,  4,  5,  6,  7,  7
 157     }, { /* 444 */
 158         4,  4,  4,  4,  4,  4,  4,  4,
 159         4,  4,  4,  4,  4,  4,  4,  4,
 160         4,  4,  4,  4,  4,  4,  4,  4,
 161         4,  4,  4,  4,  4,  4,  4,  5,
 162         4,  4,  4,  4,  4,  4,  5,  5,
 163         4,  4,  4,  4,  4,  5,  5,  6,
 164         4,  4,  4,  4,  5,  5,  6,  7,
 165         4,  4,  4,  4,  5,  6,  7,  7
 166     }, { /* 444 xq */
 167         4,  4,  4,  4,  4,  4,  4,  4,
 168         4,  4,  4,  4,  4,  4,  4,  4,
 169         4,  4,  4,  4,  4,  4,  4,  4,
 170         4,  4,  4,  4,  4,  4,  4,  5,
 171         4,  4,  4,  4,  4,  4,  5,  5,
 172         4,  4,  4,  4,  4,  5,  5,  6,
 173         4,  4,  4,  4,  5,  5,  6,  7,
 174         4,  4,  4,  4,  5,  6,  7,  7
 175     }
 176 };
 177
 178
 179 typedef struct {
 180     AVClass *class;
 181     FDCTDSPContext fdsp;
 182     uint8_t* fill_y;
 183     uint8_t* fill_u;
 184     uint8_t* fill_v;
 185     uint8_t* fill_a;
 186
 187     int qmat_luma[16][64];
 188     int qmat_chroma[16][64];
 189     const uint8_t *scantable;
 190
 191     int is_422;
 192     int need_alpha;
 193
 194     char *vendor;
 195 } ProresContext;
 196
 197 static void encode_codeword(PutBitContext *pb, int val, int codebook)
 198 {
 199     unsigned int rice_order, exp_order, switch_bits, first_exp, exp, zeros;
 200
 201     /* number of bits to switch between rice and exp golomb */
 202     switch_bits = codebook & 3;
 203     rice_order  = codebook >> 5;
 204     exp_order   = (codebook >> 2) & 7;
 205
 206     first_exp = ((switch_bits + 1) << rice_order);
 207
 208     if (val >= first_exp) { /* exp golomb */
 209         val -= first_exp;
 210         val += (1 << exp_order);
 211         exp = av_log2(val);
 212         zeros = exp - exp_order + switch_bits + 1;
 213         put_bits(pb, zeros, 0);
 214         put_bits(pb, exp + 1, val);
 215     } else if (rice_order) {
 216         put_bits(pb, (val >> rice_order), 0);
 217         put_bits(pb, 1, 1);
 218         put_sbits(pb, rice_order, val);
 219     } else {
 220         put_bits(pb, val, 0);
 221         put_bits(pb, 1, 1);
 222     }
 223 }
 224
 225 #define QSCALE(qmat,ind,val) ((val) / ((qmat)[ind]))
 226 #define TO_GOLOMB(val) (((val) << 1) ^ ((val) >> 31))
 227 #define DIFF_SIGN(val, sign) (((val) >> 31) ^ (sign))
 228 #define IS_NEGATIVE(val) ((((val) >> 31) ^ -1) + 1)
 229 #define TO_GOLOMB2(val,sign) ((val)==0 ? 0 : ((val) << 1) + (sign))
 230
 231 static av_always_inline int get_level(int val)
 232 {
 233     int sign = (val >> 31);
 234     return (val ^ sign) - sign;
 235 }
 236
 237 #define FIRST_DC_CB 0xB8
 238
 239 static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
 240
 241 static void encode_dc_coeffs(PutBitContext *pb, int16_t *in,
 242         int blocks_per_slice, int *qmat)
 243 {
 244     int prev_dc, code;
 245     int i, sign, idx;
 246     int new_dc, delta, diff_sign, new_code;
 247
 248     prev_dc = QSCALE(qmat, 0, in[0] - 16384);
 249     code = TO_GOLOMB(prev_dc);
 250     encode_codeword(pb, code, FIRST_DC_CB);
 251
 252     code = 5; sign = 0; idx = 64;
 253     for (i = 1; i < blocks_per_slice; i++, idx += 64) {
 254         new_dc    = QSCALE(qmat, 0, in[idx] - 16384);
 255         delta     = new_dc - prev_dc;
 256         diff_sign = DIFF_SIGN(delta, sign);
 257         new_code  = TO_GOLOMB2(get_level(delta), diff_sign);
 258
 259         encode_codeword(pb, new_code, dc_codebook[FFMIN(code, 6)]);
 260
 261         code      = new_code;
 262         sign      = delta >> 31;
 263         prev_dc   = new_dc;
 264     }
 265 }
 266
 267 static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29,
 268         0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C };
 269 static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28,
 270         0x28, 0x28, 0x28, 0x4C };
 271
 272 static void encode_ac_coeffs(PutBitContext *pb,
 273         int16_t *in, int blocks_per_slice, int *qmat, const uint8_t ff_prores_scan[64])
 274 {
 275     int prev_run = 4;
 276     int prev_level = 2;
 277
 278     int run = 0, level, code, i, j;
 279     for (i = 1; i < 64; i++) {
 280         int indp = ff_prores_scan[i];
 281         for (j = 0; j < blocks_per_slice; j++) {
 282             int val = QSCALE(qmat, indp, in[(j << 6) + indp]);
 283             if (val) {
 284                 encode_codeword(pb, run, run_to_cb[FFMIN(prev_run, 15)]);
 285
 286                 prev_run   = run;
 287                 run        = 0;
 288                 level      = get_level(val);
 289                 code       = level - 1;
 290
 291                 encode_codeword(pb, code, lev_to_cb[FFMIN(prev_level, 9)]);
 292
 293                 prev_level = level;
 294
 295                 put_bits(pb, 1, IS_NEGATIVE(val));
 296             } else {
 297                 ++run;
 298             }
 299         }
 300     }
 301 }
 302
 303 static void get(uint8_t *pixels, int stride, int16_t* block)
 304 {
 305     int i;
 306
 307     for (i = 0; i < 8; i++) {
 308         AV_WN64(block, AV_RN64(pixels));
 309         AV_WN64(block+4, AV_RN64(pixels+8));
 310         pixels += stride;
 311         block += 8;
 312     }
 313 }
 314
 315 static void fdct_get(FDCTDSPContext *fdsp, uint8_t *pixels, int stride, int16_t* block)
 316 {
 317     get(pixels, stride, block);
 318     fdsp->fdct(block);
 319 }
 320
 321 static void calc_plane_dct(FDCTDSPContext *fdsp, uint8_t *src, int16_t * blocks, int src_stride, int mb_count, int chroma, int is_422)
 322 {
 323     int16_t *block;
 324     int i;
 325
 326     block = blocks;
 327
 328     if (!chroma) { /* Luma plane */
 329         for (i = 0; i < mb_count; i++) {
 330             fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
 331             fdct_get(fdsp, src + 16,                  src_stride, block + (1 << 6));
 332             fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (2 << 6));
 333             fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
 334
 335             block += 256;
 336             src   += 32;
 337         }
 338     } else if (chroma && is_422){ /* chroma plane 422 */
 339         for (i = 0; i < mb_count; i++) {
 340             fdct_get(fdsp, src,                  src_stride, block + (0 << 6));
 341             fdct_get(fdsp, src + 8 * src_stride, src_stride, block + (1 << 6));
 342             block += (256 >> 1);
 343             src   += (32  >> 1);
 344         }
 345     } else { /* chroma plane 444 */
 346         for (i = 0; i < mb_count; i++) {
 347             fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
 348             fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (1 << 6));
 349             fdct_get(fdsp, src + 16,                  src_stride, block + (2 << 6));
 350             fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
 351
 352             block += 256;
 353             src   += 32;
 354         }
 355     }
 356 }
 357
 358 static int encode_slice_plane(int16_t *blocks, int mb_count, uint8_t *buf, unsigned buf_size, int *qmat, int sub_sample_chroma,
 359                               const uint8_t ff_prores_scan[64])
 360 {
 361     int blocks_per_slice;
 362     PutBitContext pb;
 363
 364     blocks_per_slice = mb_count << (2 - sub_sample_chroma);
 365     init_put_bits(&pb, buf, buf_size);
 366
 367     encode_dc_coeffs(&pb, blocks, blocks_per_slice, qmat);
 368     encode_ac_coeffs(&pb, blocks, blocks_per_slice, qmat, ff_prores_scan);
 369
 370     flush_put_bits(&pb);
 371     return put_bits_ptr(&pb) - pb.buf;
 372 }
 373
 374 static av_always_inline unsigned encode_slice_data(AVCodecContext *avctx,
 375                                                    int16_t * blocks_y, int16_t * blocks_u, int16_t * blocks_v,
 376                                                    unsigned mb_count, uint8_t *buf, unsigned data_size,
 377                                                    unsigned* y_data_size, unsigned* u_data_size, unsigned* v_data_size,
 378                                                    int qp)
 379 {
 380     ProresContext* ctx = avctx->priv_data;
 381
 382     *y_data_size = encode_slice_plane(blocks_y, mb_count,
 383                                       buf, data_size, ctx->qmat_luma[qp - 1], 0, ctx->scantable);
 384
 385     if (!(avctx->flags & AV_CODEC_FLAG_GRAY)) {
 386         *u_data_size = encode_slice_plane(blocks_u, mb_count, buf + *y_data_size, data_size - *y_data_size,
 387                                           ctx->qmat_chroma[qp - 1], ctx->is_422, ctx->scantable);
 388
 389         *v_data_size = encode_slice_plane(blocks_v, mb_count, buf + *y_data_size + *u_data_size,
 390                                           data_size - *y_data_size - *u_data_size,
 391                                           ctx->qmat_chroma[qp - 1], ctx->is_422, ctx->scantable);
 392     }
 393
 394     return *y_data_size + *u_data_size + *v_data_size;
 395 }
 396
 397 static void put_alpha_diff(PutBitContext *pb, int cur, int prev)
 398 {
 399     const int abits = 16;
 400     const int dbits = 7;
 401     const int dsize = 1 << dbits - 1;
 402     int diff = cur - prev;
 403
 404     diff = av_mod_uintp2(diff, abits);
 405     if (diff >= (1 << abits) - dsize)
 406         diff -= 1 << abits;
 407     if (diff < -dsize || diff > dsize || !diff) {
 408         put_bits(pb, 1, 1);
 409         put_bits(pb, abits, diff);
 410     } else {
 411         put_bits(pb, 1, 0);
 412         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 413         put_bits(pb, 1, diff < 0);
 414     }
 415 }
 416
 417 static inline void put_alpha_run(PutBitContext *pb, int run)
 418 {
 419     if (run) {
 420         put_bits(pb, 1, 0);
 421         if (run < 0x10)
 422             put_bits(pb, 4, run);
 423         else
 424             put_bits(pb, 15, run);
 425     } else {
 426         put_bits(pb, 1, 1);
 427     }
 428 }
 429
 430 static av_always_inline int encode_alpha_slice_data(AVCodecContext *avctx, int8_t * src_a,
 431                                                    unsigned mb_count, uint8_t *buf, unsigned data_size, unsigned* a_data_size)
 432 {
 433     const int abits = 16;
 434     const int mask  = (1 << abits) - 1;
 435     const int num_coeffs = mb_count * 256;
 436     int prev = mask, cur;
 437     int idx = 0;
 438     int run = 0;
 439     int16_t * blocks = (int16_t *)src_a;
 440     PutBitContext pb;
 441     init_put_bits(&pb, buf, data_size);
 442
 443     cur = blocks[idx++];
 444     put_alpha_diff(&pb, cur, prev);
 445     prev = cur;
 446     do {
 447         cur = blocks[idx++];
 448         if (cur != prev) {
 449             put_alpha_run (&pb, run);
 450             put_alpha_diff(&pb, cur, prev);
 451             prev = cur;
 452             run  = 0;
 453         } else {
 454             run++;
 455         }
 456     } while (idx < num_coeffs);
 457     if (run)
 458         put_alpha_run(&pb, run);
 459     flush_put_bits(&pb);
 460     *a_data_size = put_bits_count(&pb) >> 3;
 461
 462     if (put_bits_left(&pb) < 0) {
 463         av_log(avctx, AV_LOG_ERROR,
 464                "Underestimated required buffer size.\n");
 465         return AVERROR_BUG;
 466     } else {
 467         return 0;
 468     }
 469 }
 470
 471 static inline void subimage_with_fill_template(uint16_t *src, unsigned x, unsigned y,
 472                                                unsigned stride, unsigned width, unsigned height, uint16_t *dst,
 473                                                unsigned dst_width, unsigned dst_height, int is_alpha_plane)
 474 {
 475     int box_width = FFMIN(width - x, dst_width);
 476     int box_height = FFMIN(height - y, dst_height);
 477     int i, j, src_stride = stride >> 1;
 478     uint16_t last_pix, *last_line;
 479
 480     src += y * src_stride + x;
 481     for (i = 0; i < box_height; ++i) {
 482         for (j = 0; j < box_width; ++j) {
 483             if (!is_alpha_plane) {
 484                 dst[j] = src[j];
 485             } else {
 486                 dst[j] = src[j] << 6; /* alpha 10b to 16b */
 487             }
 488         }
 489         if (!is_alpha_plane) {
 490             last_pix = dst[j - 1];
 491         } else {
 492             last_pix = dst[j - 1] << 6; /* alpha 10b to 16b */
 493         }
 494         for (; j < dst_width; j++)
 495             dst[j] = last_pix;
 496         src += src_stride;
 497         dst += dst_width;
 498     }
 499     last_line = dst - dst_width;
 500     for (; i < dst_height; i++) {
 501         for (j = 0; j < dst_width; ++j) {
 502             dst[j] = last_line[j];
 503         }
 504         dst += dst_width;
 505     }
 506 }
 507
 508 static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
 509         unsigned stride, unsigned width, unsigned height, uint16_t *dst,
 510         unsigned dst_width, unsigned dst_height)
 511 {
 512     subimage_with_fill_template(src, x, y, stride, width, height, dst, dst_width, dst_height, 0);
 513 }
 514
 515 /* reorganize alpha data and convert 10b -> 16b */
 516 static void subimage_alpha_with_fill(uint16_t *src, unsigned x, unsigned y,
 517                                unsigned stride, unsigned width, unsigned height, uint16_t *dst,
 518                                unsigned dst_width, unsigned dst_height)
 519 {
 520     subimage_with_fill_template(src, x, y, stride, width, height, dst, dst_width, dst_height, 1);
 521 }
 522
 523 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, int mb_x,
 524         int mb_y, unsigned mb_count, uint8_t *buf, unsigned data_size,
 525         int unsafe, int *qp)
 526 {
 527     int luma_stride, chroma_stride, alpha_stride = 0;
 528     ProresContext* ctx = avctx->priv_data;
 529     int hdr_size = 6 + (ctx->need_alpha * 2); /* v data size is write when there is alpha */
 530     int ret = 0, slice_size;
 531     uint8_t *dest_y, *dest_u, *dest_v;
 532     unsigned y_data_size = 0, u_data_size = 0, v_data_size = 0, a_data_size = 0;
 533     FDCTDSPContext *fdsp = &ctx->fdsp;
 534     int tgt_bits   = (mb_count * bitrate_table[avctx->profile]) >> 2;
 535     int low_bytes  = (tgt_bits - (tgt_bits >> 3)) >> 3; // 12% bitrate fluctuation
 536     int high_bytes = (tgt_bits + (tgt_bits >> 3)) >> 3;
 537
 538     LOCAL_ALIGNED(16, int16_t, blocks_y, [DEFAULT_SLICE_MB_WIDTH << 8]);
 539     LOCAL_ALIGNED(16, int16_t, blocks_u, [DEFAULT_SLICE_MB_WIDTH << 8]);
 540     LOCAL_ALIGNED(16, int16_t, blocks_v, [DEFAULT_SLICE_MB_WIDTH << 8]);
 541
 542     luma_stride   = pic->linesize[0];
 543     chroma_stride = pic->linesize[1];
 544
 545     if (ctx->need_alpha)
 546         alpha_stride = pic->linesize[3];
 547
 548     dest_y = pic->data[0] + (mb_y << 4) * luma_stride   + (mb_x << 5);
 549     dest_u = pic->data[1] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
 550     dest_v = pic->data[2] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
 551
 552     if (unsafe) {
 553         subimage_with_fill((uint16_t *) pic->data[0], mb_x << 4, mb_y << 4,
 554                 luma_stride, avctx->width, avctx->height,
 555                 (uint16_t *) ctx->fill_y, mb_count << 4, 16);
 556         subimage_with_fill((uint16_t *) pic->data[1], mb_x << (4 - ctx->is_422), mb_y << 4,
 557                            chroma_stride, avctx->width >> ctx->is_422, avctx->height,
 558                            (uint16_t *) ctx->fill_u, mb_count << (4 - ctx->is_422), 16);
 559         subimage_with_fill((uint16_t *) pic->data[2], mb_x << (4 - ctx->is_422), mb_y << 4,
 560                            chroma_stride, avctx->width >> ctx->is_422, avctx->height,
 561                            (uint16_t *) ctx->fill_v, mb_count << (4 - ctx->is_422), 16);
 562
 563         calc_plane_dct(fdsp, ctx->fill_y, blocks_y, mb_count <<  5,                mb_count, 0, 0);
 564         calc_plane_dct(fdsp, ctx->fill_u, blocks_u, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
 565         calc_plane_dct(fdsp, ctx->fill_v, blocks_v, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
 566
 567         slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 568                           mb_count, buf + hdr_size, data_size - hdr_size,
 569                           &y_data_size, &u_data_size, &v_data_size,
 570                           *qp);
 571     } else {
 572         calc_plane_dct(fdsp, dest_y, blocks_y, luma_stride, mb_count, 0, 0);
 573         calc_plane_dct(fdsp, dest_u, blocks_u, chroma_stride, mb_count, 1, ctx->is_422);
 574         calc_plane_dct(fdsp, dest_v, blocks_v, chroma_stride, mb_count, 1, ctx->is_422);
 575
 576         slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 577                           mb_count, buf + hdr_size, data_size - hdr_size,
 578                           &y_data_size, &u_data_size, &v_data_size,
 579                           *qp);
 580
 581         if (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]) {
 582             do {
 583                 *qp += 1;
 584                 slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 585                                                mb_count, buf + hdr_size, data_size - hdr_size,
 586                                                &y_data_size, &u_data_size, &v_data_size,
 587                                                *qp);
 588             } while (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]);
 589         } else if (slice_size < low_bytes && *qp
 590                 > qp_start_table[avctx->profile]) {
 591             do {
 592                 *qp -= 1;
 593                 slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 594                                                mb_count, buf + hdr_size, data_size - hdr_size,
 595                                                &y_data_size, &u_data_size, &v_data_size,
 596                                                *qp);
 597             } while (slice_size < low_bytes && *qp > qp_start_table[avctx->profile]);
 598         }
 599     }
 600
 601     buf[0] = hdr_size << 3;
 602     buf[1] = *qp;
 603     AV_WB16(buf + 2, y_data_size);
 604     AV_WB16(buf + 4, u_data_size);
 605
 606     if (ctx->need_alpha) {
 607         AV_WB16(buf + 6, v_data_size); /* write v data size only if there is alpha */
 608
 609         subimage_alpha_with_fill((uint16_t *) pic->data[3], mb_x << 4, mb_y << 4,
 610                            alpha_stride, avctx->width, avctx->height,
 611                            (uint16_t *) ctx->fill_a, mb_count << 4, 16);
 612         ret = encode_alpha_slice_data(avctx, ctx->fill_a, mb_count,
 613                                       buf + hdr_size + slice_size,
 614                                       data_size - hdr_size - slice_size, &a_data_size);
 615     }
 616
 617     if (ret != 0) {
 618         return ret;
 619     }
 620     return hdr_size + y_data_size + u_data_size + v_data_size + a_data_size;
 621 }
 622
 623 static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
 624         uint8_t *buf, const int buf_size)
 625 {
 626     int mb_width = (avctx->width + 15) >> 4;
 627     int mb_height = (avctx->height + 15) >> 4;
 628     int hdr_size, sl_size, i;
 629     int mb_y, sl_data_size, qp;
 630     int unsafe_bot, unsafe_right;
 631     uint8_t *sl_data, *sl_data_sizes;
 632     int slice_per_line = 0, rem = mb_width;
 633
 634     for (i = av_log2(DEFAULT_SLICE_MB_WIDTH); i >= 0; --i) {
 635         slice_per_line += rem >> i;
 636         rem &= (1 << i) - 1;
 637     }
 638
 639     qp = qp_start_table[avctx->profile];
 640     hdr_size = 8; sl_data_size = buf_size - hdr_size;
 641     sl_data_sizes = buf + hdr_size;
 642     sl_data = sl_data_sizes + (slice_per_line * mb_height * 2);
 643     for (mb_y = 0; mb_y < mb_height; mb_y++) {
 644         int mb_x = 0;
 645         int slice_mb_count = DEFAULT_SLICE_MB_WIDTH;
 646         while (mb_x < mb_width) {
 647             while (mb_width - mb_x < slice_mb_count)
 648                 slice_mb_count >>= 1;
 649
 650             unsafe_bot = (avctx->height & 0xf) && (mb_y == mb_height - 1);
 651             unsafe_right = (avctx->width & 0xf) && (mb_x + slice_mb_count == mb_width);
 652
 653             sl_size = encode_slice(avctx, pic, mb_x, mb_y, slice_mb_count,
 654                     sl_data, sl_data_size, unsafe_bot || unsafe_right, &qp);
 655             if (sl_size < 0){
 656                 return sl_size;
 657             }
 658
 659             bytestream_put_be16(&sl_data_sizes, sl_size);
 660             sl_data           += sl_size;
 661             sl_data_size      -= sl_size;
 662             mb_x              += slice_mb_count;
 663         }
 664     }
 665
 666     buf[0] = hdr_size << 3;
 667     AV_WB32(buf + 1, sl_data - buf);
 668     AV_WB16(buf + 5, slice_per_line * mb_height);
 669     buf[7] = av_log2(DEFAULT_SLICE_MB_WIDTH) << 4;
 670
 671     return sl_data - buf;
 672 }
 673
 674 static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 675                                const AVFrame *pict, int *got_packet)
 676 {
 677     ProresContext *ctx = avctx->priv_data;
 678     int header_size = 148;
 679     uint8_t *buf;
 680     int compress_frame_size, pic_size, ret;
 681     uint8_t frame_flags;
 682     int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + AV_INPUT_BUFFER_MIN_SIZE; //FIXME choose tighter limit
 683
 684
 685     if ((ret = ff_alloc_packet2(avctx, pkt, frame_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
 686         return ret;
 687
 688     buf = pkt->data;
 689     compress_frame_size = 8 + header_size;
 690
 691     bytestream_put_be32(&buf, compress_frame_size);/* frame size will be update after picture(s) encoding */
 692     bytestream_put_buffer(&buf, "icpf", 4);
 693
 694     bytestream_put_be16(&buf, header_size);
 695     bytestream_put_be16(&buf, 0); /* version */
 696     bytestream_put_buffer(&buf, ctx->vendor, 4);
 697     bytestream_put_be16(&buf, avctx->width);
 698     bytestream_put_be16(&buf, avctx->height);
 699     frame_flags = 0x82; /* 422 not interlaced */
 700     if (avctx->profile >= FF_PROFILE_PRORES_4444) /* 4444 or 4444 Xq */
 701         frame_flags |= 0x40; /* 444 chroma */
 702     *buf++ = frame_flags;
 703     *buf++ = 0; /* reserved */
 704     /* only write color properties, if valid value. set to unspecified otherwise */
 705     *buf++ = ff_int_from_list_or_default(avctx, "frame color primaries", pict->color_primaries, valid_primaries, 0);
 706     *buf++ = ff_int_from_list_or_default(avctx, "frame color trc", pict->color_trc, valid_trc, 0);
 707     *buf++ = ff_int_from_list_or_default(avctx, "frame colorspace", pict->colorspace, valid_colorspace, 0);
 708     if (avctx->profile >= FF_PROFILE_PRORES_4444) {
 709         if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
 710             *buf++ = 0xA0;/* src b64a and no alpha */
 711         } else {
 712             *buf++ = 0xA2;/* src b64a and 16b alpha */
 713         }
 714     } else {
 715         *buf++ = 32;/* src v210 and no alpha */
 716     }
 717     *buf++ = 0; /* reserved */
 718     *buf++ = 3; /* luma and chroma matrix present */
 719
 720     bytestream_put_buffer(&buf, QMAT_LUMA[avctx->profile],   64);
 721     bytestream_put_buffer(&buf, QMAT_CHROMA[avctx->profile], 64);
 722
 723     pic_size = prores_encode_picture(avctx, pict, buf,
 724                                      pkt->size - compress_frame_size);
 725     if (pic_size < 0) {
 726         return pic_size;
 727     }
 728     compress_frame_size += pic_size;
 729
 730     AV_WB32(pkt->data, compress_frame_size);/* update frame size */
 731     pkt->flags |= AV_PKT_FLAG_KEY;
 732     pkt->size = compress_frame_size;
 733     *got_packet = 1;
 734
 735     return 0;
 736 }
 737
 738 static void scale_mat(const uint8_t* src, int* dst, int scale)
 739 {
 740     int i;
 741     for (i = 0; i < 64; i++)
 742         dst[i] = src[i] * scale;
 743 }
 744
 745 static av_cold int prores_encode_init(AVCodecContext *avctx)
 746 {
 747     int i;
 748     ProresContext* ctx = avctx->priv_data;
 749
 750     avctx->bits_per_raw_sample = 10;
 751     ctx->need_alpha = 0;
 752     ctx->scantable = ff_prores_progressive_scan;
 753
 754     if (avctx->width & 0x1) {
 755         av_log(avctx, AV_LOG_ERROR,
 756                 "frame width needs to be multiple of 2\n");
 757         return AVERROR(EINVAL);
 758     }
 759
 760     if (avctx->width > 65534 || avctx->height > 65535) {
 761         av_log(avctx, AV_LOG_ERROR,
 762                 "The maximum dimensions are 65534x65535\n");
 763         return AVERROR(EINVAL);
 764     }
 765
 766     if (strlen(ctx->vendor) != 4) {
 767         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 768         return AVERROR(EINVAL);
 769     }
 770
 771     if (avctx->profile == FF_PROFILE_UNKNOWN) {
 772         if (avctx->pix_fmt == AV_PIX_FMT_YUV422P10) {
 773             avctx->profile = FF_PROFILE_PRORES_STANDARD;
 774             av_log(avctx, AV_LOG_INFO,
 775                 "encoding with ProRes standard (apcn) profile\n");
 776         } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
 777             avctx->profile = FF_PROFILE_PRORES_4444;
 778             av_log(avctx, AV_LOG_INFO,
 779                    "encoding with ProRes 4444 (ap4h) profile\n");
 780         } else if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
 781             avctx->profile = FF_PROFILE_PRORES_4444;
 782             av_log(avctx, AV_LOG_INFO,
 783                    "encoding with ProRes 4444+ (ap4h) profile\n");
 784         } else {
 785             av_log(avctx, AV_LOG_ERROR, "Unknown pixel format\n");
 786             return AVERROR(EINVAL);
 787         }
 788     } else if (avctx->profile < FF_PROFILE_PRORES_PROXY
 789             || avctx->profile > FF_PROFILE_PRORES_XQ) {
 790         av_log(
 791                 avctx,
 792                 AV_LOG_ERROR,
 793                 "unknown profile %d, use [0 - apco, 1 - apcs, 2 - apcn (default), 3 - apch, 4 - ap4h, 5 - ap4x]\n",
 794                 avctx->profile);
 795         return AVERROR(EINVAL);
 796     } else if ((avctx->pix_fmt == AV_PIX_FMT_YUV422P10) && (avctx->profile > FF_PROFILE_PRORES_HQ)){
 797         av_log(avctx, AV_LOG_ERROR,
 798                "encoding with ProRes 444/Xq (ap4h/ap4x) profile, need YUV444P10 input\n");
 799         return AVERROR(EINVAL);
 800     }  else if ((avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10)
 801                 && (avctx->profile < FF_PROFILE_PRORES_4444)){
 802         av_log(avctx, AV_LOG_ERROR,
 803                "encoding with ProRes Proxy/LT/422/422 HQ (apco, apcs, apcn, ap4h) profile, need YUV422P10 input\n");
 804         return AVERROR(EINVAL);
 805     }
 806
 807     if (avctx->profile < FF_PROFILE_PRORES_4444) { /* 422 versions */
 808         ctx->is_422 = 1;
 809         if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
 810             ctx->fill_y = av_malloc(4 * (DEFAULT_SLICE_MB_WIDTH << 8));
 811             if (!ctx->fill_y)
 812                 return AVERROR(ENOMEM);
 813             ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
 814             ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 8);
 815         }
 816     } else { /* 444 */
 817         ctx->is_422 = 0;
 818         if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
 819             ctx->fill_y = av_malloc(3 * (DEFAULT_SLICE_MB_WIDTH << 9));
 820             if (!ctx->fill_y)
 821                 return AVERROR(ENOMEM);
 822             ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
 823             ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 9);
 824         }
 825         if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
 826             ctx->need_alpha = 1;
 827             ctx->fill_a = av_malloc(DEFAULT_SLICE_MB_WIDTH << 9); /* 8 blocks x 16px x 16px x sizeof (uint16) */
 828             if (!ctx->fill_a)
 829                 return AVERROR(ENOMEM);
 830         }
 831     }
 832
 833     ff_fdctdsp_init(&ctx->fdsp, avctx);
 834
 835     avctx->codec_tag = AV_RL32((const uint8_t*)profiles[avctx->profile].name);
 836
 837     for (i = 1; i <= 16; i++) {
 838         scale_mat(QMAT_LUMA[avctx->profile]  , ctx->qmat_luma[i - 1]  , i);
 839         scale_mat(QMAT_CHROMA[avctx->profile], ctx->qmat_chroma[i - 1], i);
 840     }
 841
 842     return 0;
 843 }
 844
 845 static av_cold int prores_encode_close(AVCodecContext *avctx)
 846 {
 847     ProresContext* ctx = avctx->priv_data;
 848     av_freep(&ctx->fill_y);
 849     av_freep(&ctx->fill_a);
 850
 851     return 0;
 852 }
 853
 854 #define OFFSET(x) offsetof(ProresContext, x)
 855 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 856
 857 static const AVOption options[] = {
 858     { "vendor", "vendor ID", OFFSET(vendor), AV_OPT_TYPE_STRING, { .str = "fmpg" }, CHAR_MIN, CHAR_MAX, VE },
 859     { NULL }
 860 };
 861
 862 static const AVClass proresaw_enc_class = {
 863     .class_name = "ProResAw encoder",
 864     .item_name  = av_default_item_name,
 865     .option     = options,
 866     .version    = LIBAVUTIL_VERSION_INT,
 867 };
 868
 869 static const AVClass prores_enc_class = {
 870     .class_name = "ProRes encoder",
 871     .item_name  = av_default_item_name,
 872     .option     = options,
 873     .version    = LIBAVUTIL_VERSION_INT,
 874 };
 875
 876 AVCodec ff_prores_aw_encoder = {
 877     .name           = "prores_aw",
 878     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
 879     .type           = AVMEDIA_TYPE_VIDEO,
 880     .id             = AV_CODEC_ID_PRORES,
 881     .priv_data_size = sizeof(ProresContext),
 882     .init           = prores_encode_init,
 883     .close          = prores_encode_close,
 884     .encode2        = prores_encode_frame,
 885     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
 886     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
 887     .priv_class     = &proresaw_enc_class,
 888     .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 889 };
 890
 891 AVCodec ff_prores_encoder = {
 892     .name           = "prores",
 893     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
 894     .type           = AVMEDIA_TYPE_VIDEO,
 895     .id             = AV_CODEC_ID_PRORES,
 896     .priv_data_size = sizeof(ProresContext),
 897     .init           = prores_encode_init,
 898     .close          = prores_encode_close,
 899     .encode2        = prores_encode_frame,
 900     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
 901     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
 902     .priv_class     = &prores_enc_class,
 903     .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 904 };