git.sesse.net Git - ffmpeg/blob - libavcodec/proresenc_anatoliy.c

   1 /*
   2  * Apple ProRes encoder
   3  *
   4  * Copyright (c) 2011 Anatoliy Wasserman
   5  * Copyright (c) 2012 Konstantin Shishkov
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Apple ProRes encoder (Anatoliy Wasserman version)
  27  * Known FOURCCs: 'ap4h' (444), 'apch' (HQ), 'apcn' (422), 'apcs' (LT), 'acpo' (Proxy)
  28  */
  29
  30 #include "libavutil/opt.h"
  31 #include "avcodec.h"
  32 #include "dct.h"
  33 #include "internal.h"
  34 #include "profiles.h"
  35 #include "proresdata.h"
  36 #include "put_bits.h"
  37 #include "bytestream.h"
  38 #include "fdctdsp.h"
  39
  40 #define DEFAULT_SLICE_MB_WIDTH 8
  41
  42 static const AVProfile profiles[] = {
  43     { FF_PROFILE_PRORES_PROXY,    "apco"},
  44     { FF_PROFILE_PRORES_LT,       "apcs"},
  45     { FF_PROFILE_PRORES_STANDARD, "apcn"},
  46     { FF_PROFILE_PRORES_HQ,       "apch"},
  47     { FF_PROFILE_PRORES_4444,     "ap4h"},
  48     { FF_PROFILE_UNKNOWN }
  49 };
  50
  51 static const int qp_start_table[5] = {  8, 3, 2, 1, 1};
  52 static const int qp_end_table[5]   = { 13, 9, 6, 6, 5};
  53 static const int bitrate_table[5]  = { 1000, 2100, 3500, 5400, 7000};
  54
  55 static const int valid_primaries[9]  = { AVCOL_PRI_RESERVED0, AVCOL_PRI_BT709, AVCOL_PRI_UNSPECIFIED, AVCOL_PRI_BT470BG,
  56                                          AVCOL_PRI_SMPTE170M, AVCOL_PRI_BT2020, AVCOL_PRI_SMPTE431, AVCOL_PRI_SMPTE432,INT_MAX };
  57 static const int valid_trc[4]        = { AVCOL_TRC_RESERVED0, AVCOL_TRC_BT709, AVCOL_TRC_UNSPECIFIED, INT_MAX };
  58 static const int valid_colorspace[5] = { AVCOL_SPC_BT709, AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_SMPTE170M,
  59                                          AVCOL_SPC_BT2020_NCL, INT_MAX };
  60
  61 static const uint8_t QMAT_LUMA[5][64] = {
  62     {
  63          4,  7,  9, 11, 13, 14, 15, 63,
  64          7,  7, 11, 12, 14, 15, 63, 63,
  65          9, 11, 13, 14, 15, 63, 63, 63,
  66         11, 11, 13, 14, 63, 63, 63, 63,
  67         11, 13, 14, 63, 63, 63, 63, 63,
  68         13, 14, 63, 63, 63, 63, 63, 63,
  69         13, 63, 63, 63, 63, 63, 63, 63,
  70         63, 63, 63, 63, 63, 63, 63, 63
  71     }, {
  72          4,  5,  6,  7,  9, 11, 13, 15,
  73          5,  5,  7,  8, 11, 13, 15, 17,
  74          6,  7,  9, 11, 13, 15, 15, 17,
  75          7,  7,  9, 11, 13, 15, 17, 19,
  76          7,  9, 11, 13, 14, 16, 19, 23,
  77          9, 11, 13, 14, 16, 19, 23, 29,
  78          9, 11, 13, 15, 17, 21, 28, 35,
  79         11, 13, 16, 17, 21, 28, 35, 41
  80     }, {
  81          4,  4,  5,  5,  6,  7,  7,  9,
  82          4,  4,  5,  6,  7,  7,  9,  9,
  83          5,  5,  6,  7,  7,  9,  9, 10,
  84          5,  5,  6,  7,  7,  9,  9, 10,
  85          5,  6,  7,  7,  8,  9, 10, 12,
  86          6,  7,  7,  8,  9, 10, 12, 15,
  87          6,  7,  7,  9, 10, 11, 14, 17,
  88          7,  7,  9, 10, 11, 14, 17, 21
  89     }, {
  90          4,  4,  4,  4,  4,  4,  4,  4,
  91          4,  4,  4,  4,  4,  4,  4,  4,
  92          4,  4,  4,  4,  4,  4,  4,  4,
  93          4,  4,  4,  4,  4,  4,  4,  5,
  94          4,  4,  4,  4,  4,  4,  5,  5,
  95          4,  4,  4,  4,  4,  5,  5,  6,
  96          4,  4,  4,  4,  5,  5,  6,  7,
  97          4,  4,  4,  4,  5,  6,  7,  7
  98     }, { /* 444 */
  99         4,  4,  4,  4,  4,  4,  4,  4,
 100         4,  4,  4,  4,  4,  4,  4,  4,
 101         4,  4,  4,  4,  4,  4,  4,  4,
 102         4,  4,  4,  4,  4,  4,  4,  5,
 103         4,  4,  4,  4,  4,  4,  5,  5,
 104         4,  4,  4,  4,  4,  5,  5,  6,
 105         4,  4,  4,  4,  5,  5,  6,  7,
 106         4,  4,  4,  4,  5,  6,  7,  7
 107     }
 108 };
 109
 110 static const uint8_t QMAT_CHROMA[5][64] = {
 111     {
 112          4,  7,  9, 11, 13, 14, 63, 63,
 113          7,  7, 11, 12, 14, 63, 63, 63,
 114          9, 11, 13, 14, 63, 63, 63, 63,
 115         11, 11, 13, 14, 63, 63, 63, 63,
 116         11, 13, 14, 63, 63, 63, 63, 63,
 117         13, 14, 63, 63, 63, 63, 63, 63,
 118         13, 63, 63, 63, 63, 63, 63, 63,
 119         63, 63, 63, 63, 63, 63, 63, 63
 120     }, {
 121          4,  5,  6,  7,  9, 11, 13, 15,
 122          5,  5,  7,  8, 11, 13, 15, 17,
 123          6,  7,  9, 11, 13, 15, 15, 17,
 124          7,  7,  9, 11, 13, 15, 17, 19,
 125          7,  9, 11, 13, 14, 16, 19, 23,
 126          9, 11, 13, 14, 16, 19, 23, 29,
 127          9, 11, 13, 15, 17, 21, 28, 35,
 128         11, 13, 16, 17, 21, 28, 35, 41
 129     }, {
 130          4,  4,  5,  5,  6,  7,  7,  9,
 131          4,  4,  5,  6,  7,  7,  9,  9,
 132          5,  5,  6,  7,  7,  9,  9, 10,
 133          5,  5,  6,  7,  7,  9,  9, 10,
 134          5,  6,  7,  7,  8,  9, 10, 12,
 135          6,  7,  7,  8,  9, 10, 12, 15,
 136          6,  7,  7,  9, 10, 11, 14, 17,
 137          7,  7,  9, 10, 11, 14, 17, 21
 138     }, {
 139          4,  4,  4,  4,  4,  4,  4,  4,
 140          4,  4,  4,  4,  4,  4,  4,  4,
 141          4,  4,  4,  4,  4,  4,  4,  4,
 142          4,  4,  4,  4,  4,  4,  4,  5,
 143          4,  4,  4,  4,  4,  4,  5,  5,
 144          4,  4,  4,  4,  4,  5,  5,  6,
 145          4,  4,  4,  4,  5,  5,  6,  7,
 146          4,  4,  4,  4,  5,  6,  7,  7
 147     }, { /* 444 */
 148         4,  4,  4,  4,  4,  4,  4,  4,
 149         4,  4,  4,  4,  4,  4,  4,  4,
 150         4,  4,  4,  4,  4,  4,  4,  4,
 151         4,  4,  4,  4,  4,  4,  4,  5,
 152         4,  4,  4,  4,  4,  4,  5,  5,
 153         4,  4,  4,  4,  4,  5,  5,  6,
 154         4,  4,  4,  4,  5,  5,  6,  7,
 155         4,  4,  4,  4,  5,  6,  7,  7
 156     }
 157 };
 158
 159
 160 typedef struct {
 161     AVClass *class;
 162     FDCTDSPContext fdsp;
 163     uint8_t* fill_y;
 164     uint8_t* fill_u;
 165     uint8_t* fill_v;
 166     uint8_t* fill_a;
 167
 168     int qmat_luma[16][64];
 169     int qmat_chroma[16][64];
 170
 171     int is_422;
 172     int need_alpha;
 173
 174     char *vendor;
 175 } ProresContext;
 176
 177 static void encode_codeword(PutBitContext *pb, int val, int codebook)
 178 {
 179     unsigned int rice_order, exp_order, switch_bits, first_exp, exp, zeros;
 180
 181     /* number of bits to switch between rice and exp golomb */
 182     switch_bits = codebook & 3;
 183     rice_order  = codebook >> 5;
 184     exp_order   = (codebook >> 2) & 7;
 185
 186     first_exp = ((switch_bits + 1) << rice_order);
 187
 188     if (val >= first_exp) { /* exp golomb */
 189         val -= first_exp;
 190         val += (1 << exp_order);
 191         exp = av_log2(val);
 192         zeros = exp - exp_order + switch_bits + 1;
 193         put_bits(pb, zeros, 0);
 194         put_bits(pb, exp + 1, val);
 195     } else if (rice_order) {
 196         put_bits(pb, (val >> rice_order), 0);
 197         put_bits(pb, 1, 1);
 198         put_sbits(pb, rice_order, val);
 199     } else {
 200         put_bits(pb, val, 0);
 201         put_bits(pb, 1, 1);
 202     }
 203 }
 204
 205 #define QSCALE(qmat,ind,val) ((val) / ((qmat)[ind]))
 206 #define TO_GOLOMB(val) (((val) << 1) ^ ((val) >> 31))
 207 #define DIFF_SIGN(val, sign) (((val) >> 31) ^ (sign))
 208 #define IS_NEGATIVE(val) ((((val) >> 31) ^ -1) + 1)
 209 #define TO_GOLOMB2(val,sign) ((val)==0 ? 0 : ((val) << 1) + (sign))
 210
 211 static av_always_inline int get_level(int val)
 212 {
 213     int sign = (val >> 31);
 214     return (val ^ sign) - sign;
 215 }
 216
 217 #define FIRST_DC_CB 0xB8
 218
 219 static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
 220
 221 static void encode_dc_coeffs(PutBitContext *pb, int16_t *in,
 222         int blocks_per_slice, int *qmat)
 223 {
 224     int prev_dc, code;
 225     int i, sign, idx;
 226     int new_dc, delta, diff_sign, new_code;
 227
 228     prev_dc = QSCALE(qmat, 0, in[0] - 16384);
 229     code = TO_GOLOMB(prev_dc);
 230     encode_codeword(pb, code, FIRST_DC_CB);
 231
 232     code = 5; sign = 0; idx = 64;
 233     for (i = 1; i < blocks_per_slice; i++, idx += 64) {
 234         new_dc    = QSCALE(qmat, 0, in[idx] - 16384);
 235         delta     = new_dc - prev_dc;
 236         diff_sign = DIFF_SIGN(delta, sign);
 237         new_code  = TO_GOLOMB2(get_level(delta), diff_sign);
 238
 239         encode_codeword(pb, new_code, dc_codebook[FFMIN(code, 6)]);
 240
 241         code      = new_code;
 242         sign      = delta >> 31;
 243         prev_dc   = new_dc;
 244     }
 245 }
 246
 247 static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29,
 248         0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C };
 249 static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28,
 250         0x28, 0x28, 0x28, 0x4C };
 251
 252 static void encode_ac_coeffs(PutBitContext *pb,
 253         int16_t *in, int blocks_per_slice, int *qmat)
 254 {
 255     int prev_run = 4;
 256     int prev_level = 2;
 257
 258     int run = 0, level, code, i, j;
 259     for (i = 1; i < 64; i++) {
 260         int indp = ff_prores_progressive_scan[i];
 261         for (j = 0; j < blocks_per_slice; j++) {
 262             int val = QSCALE(qmat, indp, in[(j << 6) + indp]);
 263             if (val) {
 264                 encode_codeword(pb, run, run_to_cb[FFMIN(prev_run, 15)]);
 265
 266                 prev_run   = run;
 267                 run        = 0;
 268                 level      = get_level(val);
 269                 code       = level - 1;
 270
 271                 encode_codeword(pb, code, lev_to_cb[FFMIN(prev_level, 9)]);
 272
 273                 prev_level = level;
 274
 275                 put_bits(pb, 1, IS_NEGATIVE(val));
 276             } else {
 277                 ++run;
 278             }
 279         }
 280     }
 281 }
 282
 283 static void get(uint8_t *pixels, int stride, int16_t* block)
 284 {
 285     int i;
 286
 287     for (i = 0; i < 8; i++) {
 288         AV_WN64(block, AV_RN64(pixels));
 289         AV_WN64(block+4, AV_RN64(pixels+8));
 290         pixels += stride;
 291         block += 8;
 292     }
 293 }
 294
 295 static void fdct_get(FDCTDSPContext *fdsp, uint8_t *pixels, int stride, int16_t* block)
 296 {
 297     get(pixels, stride, block);
 298     fdsp->fdct(block);
 299 }
 300
 301 static void calc_plane_dct(FDCTDSPContext *fdsp, uint8_t *src, int16_t * blocks, int src_stride, int mb_count, int chroma, int is_422)
 302 {
 303     int16_t *block;
 304     int i;
 305
 306     block = blocks;
 307
 308     if (!chroma) { /* Luma plane */
 309         for (i = 0; i < mb_count; i++) {
 310             fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
 311             fdct_get(fdsp, src + 16,                  src_stride, block + (1 << 6));
 312             fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (2 << 6));
 313             fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
 314
 315             block += 256;
 316             src   += 32;
 317         }
 318     } else if (chroma && is_422){ /* chroma plane 422 */
 319         for (i = 0; i < mb_count; i++) {
 320             fdct_get(fdsp, src,                  src_stride, block + (0 << 6));
 321             fdct_get(fdsp, src + 8 * src_stride, src_stride, block + (1 << 6));
 322             block += (256 >> 1);
 323             src   += (32  >> 1);
 324         }
 325     } else { /* chroma plane 444 */
 326         for (i = 0; i < mb_count; i++) {
 327             fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
 328             fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (1 << 6));
 329             fdct_get(fdsp, src + 16,                  src_stride, block + (2 << 6));
 330             fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
 331
 332             block += 256;
 333             src   += 32;
 334         }
 335     }
 336 }
 337
 338 static int encode_slice_plane(int16_t *blocks, int mb_count, uint8_t *buf, unsigned buf_size, int *qmat, int sub_sample_chroma)
 339 {
 340     int blocks_per_slice;
 341     PutBitContext pb;
 342
 343     blocks_per_slice = mb_count << (2 - sub_sample_chroma);
 344     init_put_bits(&pb, buf, buf_size);
 345
 346     encode_dc_coeffs(&pb, blocks, blocks_per_slice, qmat);
 347     encode_ac_coeffs(&pb, blocks, blocks_per_slice, qmat);
 348
 349     flush_put_bits(&pb);
 350     return put_bits_ptr(&pb) - pb.buf;
 351 }
 352
 353 static av_always_inline unsigned encode_slice_data(AVCodecContext *avctx,
 354                                                    int16_t * blocks_y, int16_t * blocks_u, int16_t * blocks_v,
 355                                                    unsigned mb_count, uint8_t *buf, unsigned data_size,
 356                                                    unsigned* y_data_size, unsigned* u_data_size, unsigned* v_data_size,
 357                                                    int qp)
 358 {
 359     ProresContext* ctx = avctx->priv_data;
 360
 361     *y_data_size = encode_slice_plane(blocks_y, mb_count,
 362                                       buf, data_size, ctx->qmat_luma[qp - 1], 0);
 363
 364     if (!(avctx->flags & AV_CODEC_FLAG_GRAY)) {
 365         *u_data_size = encode_slice_plane(blocks_u, mb_count, buf + *y_data_size, data_size - *y_data_size,
 366                                           ctx->qmat_chroma[qp - 1], ctx->is_422);
 367
 368         *v_data_size = encode_slice_plane(blocks_v, mb_count, buf + *y_data_size + *u_data_size,
 369                                           data_size - *y_data_size - *u_data_size,
 370                                           ctx->qmat_chroma[qp - 1], ctx->is_422);
 371     }
 372
 373     return *y_data_size + *u_data_size + *v_data_size;
 374 }
 375
 376 static void put_alpha_diff(PutBitContext *pb, int cur, int prev)
 377 {
 378     const int abits = 16;
 379     const int dbits = 7;
 380     const int dsize = 1 << dbits - 1;
 381     int diff = cur - prev;
 382
 383     diff = av_mod_uintp2(diff, abits);
 384     if (diff >= (1 << abits) - dsize)
 385         diff -= 1 << abits;
 386     if (diff < -dsize || diff > dsize || !diff) {
 387         put_bits(pb, 1, 1);
 388         put_bits(pb, abits, diff);
 389     } else {
 390         put_bits(pb, 1, 0);
 391         put_bits(pb, dbits - 1, FFABS(diff) - 1);
 392         put_bits(pb, 1, diff < 0);
 393     }
 394 }
 395
 396 static inline void put_alpha_run(PutBitContext *pb, int run)
 397 {
 398     if (run) {
 399         put_bits(pb, 1, 0);
 400         if (run < 0x10)
 401             put_bits(pb, 4, run);
 402         else
 403             put_bits(pb, 15, run);
 404     } else {
 405         put_bits(pb, 1, 1);
 406     }
 407 }
 408
 409 static av_always_inline int encode_alpha_slice_data(AVCodecContext *avctx, int8_t * src_a,
 410                                                    unsigned mb_count, uint8_t *buf, unsigned data_size, unsigned* a_data_size)
 411 {
 412     const int abits = 16;
 413     const int mask  = (1 << abits) - 1;
 414     const int num_coeffs = mb_count * 256;
 415     int prev = mask, cur;
 416     int idx = 0;
 417     int run = 0;
 418     int16_t * blocks = (int16_t *)src_a;
 419     PutBitContext pb;
 420     init_put_bits(&pb, buf, data_size);
 421
 422     cur = blocks[idx++];
 423     put_alpha_diff(&pb, cur, prev);
 424     prev = cur;
 425     do {
 426         cur = blocks[idx++];
 427         if (cur != prev) {
 428             put_alpha_run (&pb, run);
 429             put_alpha_diff(&pb, cur, prev);
 430             prev = cur;
 431             run  = 0;
 432         } else {
 433             run++;
 434         }
 435     } while (idx < num_coeffs);
 436     if (run)
 437         put_alpha_run(&pb, run);
 438     flush_put_bits(&pb);
 439     *a_data_size = put_bits_count(&pb) >> 3;
 440
 441     if (put_bits_left(&pb) < 0) {
 442         av_log(avctx, AV_LOG_ERROR,
 443                "Underestimated required buffer size.\n");
 444         return AVERROR_BUG;
 445     } else {
 446         return 0;
 447     }
 448 }
 449
 450 static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
 451         unsigned stride, unsigned width, unsigned height, uint16_t *dst,
 452         unsigned dst_width, unsigned dst_height)
 453 {
 454
 455     int box_width = FFMIN(width - x, dst_width);
 456     int box_height = FFMIN(height - y, dst_height);
 457     int i, j, src_stride = stride >> 1;
 458     uint16_t last_pix, *last_line;
 459
 460     src += y * src_stride + x;
 461     for (i = 0; i < box_height; ++i) {
 462         for (j = 0; j < box_width; ++j) {
 463             dst[j] = src[j];
 464         }
 465         last_pix = dst[j - 1];
 466         for (; j < dst_width; j++)
 467             dst[j] = last_pix;
 468         src += src_stride;
 469         dst += dst_width;
 470     }
 471     last_line = dst - dst_width;
 472     for (; i < dst_height; i++) {
 473         for (j = 0; j < dst_width; ++j) {
 474             dst[j] = last_line[j];
 475         }
 476         dst += dst_width;
 477     }
 478 }
 479
 480 /* reorganize alpha data and convert 10b -> 16b */
 481 static void subimage_alpha_with_fill(uint16_t *src, unsigned x, unsigned y,
 482                                unsigned stride, unsigned width, unsigned height, uint16_t *dst,
 483                                unsigned dst_width, unsigned dst_height)
 484 {
 485     int box_width = FFMIN(width - x, dst_width);
 486     int box_height = FFMIN(height - y, dst_height);
 487     int i, j, src_stride = stride >> 1;
 488     uint16_t last_pix, *last_line;
 489
 490     src += y * src_stride + x;
 491     for (i = 0; i < box_height; ++i) {
 492         for (j = 0; j < box_width; ++j) {
 493             dst[j] = src[j] << 6; /* 10b to 16b */
 494         }
 495         last_pix = dst[j - 1] << 6; /* 10b to 16b */
 496         for (; j < dst_width; j++)
 497             dst[j] = last_pix;
 498         src += src_stride;
 499         dst += dst_width;
 500     }
 501     last_line = dst - dst_width;
 502     for (; i < dst_height; i++) {
 503         for (j = 0; j < dst_width; ++j) {
 504             dst[j] = last_line[j];
 505         }
 506         dst += dst_width;
 507     }
 508 }
 509
 510 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, int mb_x,
 511         int mb_y, unsigned mb_count, uint8_t *buf, unsigned data_size,
 512         int unsafe, int *qp)
 513 {
 514     int luma_stride, chroma_stride, alpha_stride = 0;
 515     ProresContext* ctx = avctx->priv_data;
 516     int hdr_size = 6 + (ctx->need_alpha * 2); /* v data size is write when there is alpha */
 517     int ret = 0, slice_size;
 518     uint8_t *dest_y, *dest_u, *dest_v;
 519     unsigned y_data_size = 0, u_data_size = 0, v_data_size = 0, a_data_size = 0;
 520     FDCTDSPContext *fdsp = &ctx->fdsp;
 521     int tgt_bits   = (mb_count * bitrate_table[avctx->profile]) >> 2;
 522     int low_bytes  = (tgt_bits - (tgt_bits >> 3)) >> 3; // 12% bitrate fluctuation
 523     int high_bytes = (tgt_bits + (tgt_bits >> 3)) >> 3;
 524
 525     LOCAL_ALIGNED(16, int16_t, blocks_y, [DEFAULT_SLICE_MB_WIDTH << 8]);
 526     LOCAL_ALIGNED(16, int16_t, blocks_u, [DEFAULT_SLICE_MB_WIDTH << 8]);
 527     LOCAL_ALIGNED(16, int16_t, blocks_v, [DEFAULT_SLICE_MB_WIDTH << 8]);
 528
 529     luma_stride   = pic->linesize[0];
 530     chroma_stride = pic->linesize[1];
 531
 532     if (ctx->need_alpha)
 533         alpha_stride = pic->linesize[3];
 534
 535     dest_y = pic->data[0] + (mb_y << 4) * luma_stride   + (mb_x << 5);
 536     dest_u = pic->data[1] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
 537     dest_v = pic->data[2] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
 538
 539     if (unsafe) {
 540         subimage_with_fill((uint16_t *) pic->data[0], mb_x << 4, mb_y << 4,
 541                 luma_stride, avctx->width, avctx->height,
 542                 (uint16_t *) ctx->fill_y, mb_count << 4, 16);
 543         subimage_with_fill((uint16_t *) pic->data[1], mb_x << (4 - ctx->is_422), mb_y << 4,
 544                            chroma_stride, avctx->width >> ctx->is_422, avctx->height,
 545                            (uint16_t *) ctx->fill_u, mb_count << (4 - ctx->is_422), 16);
 546         subimage_with_fill((uint16_t *) pic->data[2], mb_x << (4 - ctx->is_422), mb_y << 4,
 547                            chroma_stride, avctx->width >> ctx->is_422, avctx->height,
 548                            (uint16_t *) ctx->fill_v, mb_count << (4 - ctx->is_422), 16);
 549
 550         calc_plane_dct(fdsp, ctx->fill_y, blocks_y, mb_count <<  5,                mb_count, 0, 0);
 551         calc_plane_dct(fdsp, ctx->fill_u, blocks_u, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
 552         calc_plane_dct(fdsp, ctx->fill_v, blocks_v, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
 553
 554         slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 555                           mb_count, buf + hdr_size, data_size - hdr_size,
 556                           &y_data_size, &u_data_size, &v_data_size,
 557                           *qp);
 558     } else {
 559         calc_plane_dct(fdsp, dest_y, blocks_y, luma_stride, mb_count, 0, 0);
 560         calc_plane_dct(fdsp, dest_u, blocks_u, chroma_stride, mb_count, 1, ctx->is_422);
 561         calc_plane_dct(fdsp, dest_v, blocks_v, chroma_stride, mb_count, 1, ctx->is_422);
 562
 563         slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 564                           mb_count, buf + hdr_size, data_size - hdr_size,
 565                           &y_data_size, &u_data_size, &v_data_size,
 566                           *qp);
 567
 568         if (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]) {
 569             do {
 570                 *qp += 1;
 571                 slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 572                                                mb_count, buf + hdr_size, data_size - hdr_size,
 573                                                &y_data_size, &u_data_size, &v_data_size,
 574                                                *qp);
 575             } while (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]);
 576         } else if (slice_size < low_bytes && *qp
 577                 > qp_start_table[avctx->profile]) {
 578             do {
 579                 *qp -= 1;
 580                 slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
 581                                                mb_count, buf + hdr_size, data_size - hdr_size,
 582                                                &y_data_size, &u_data_size, &v_data_size,
 583                                                *qp);
 584             } while (slice_size < low_bytes && *qp > qp_start_table[avctx->profile]);
 585         }
 586     }
 587
 588     buf[0] = hdr_size << 3;
 589     buf[1] = *qp;
 590     AV_WB16(buf + 2, y_data_size);
 591     AV_WB16(buf + 4, u_data_size);
 592
 593     if (ctx->need_alpha) {
 594         AV_WB16(buf + 6, v_data_size); /* write v data size only if there is alpha */
 595
 596         subimage_alpha_with_fill((uint16_t *) pic->data[3], mb_x << 4, mb_y << 4,
 597                            alpha_stride, avctx->width, avctx->height,
 598                            (uint16_t *) ctx->fill_a, mb_count << 4, 16);
 599         ret = encode_alpha_slice_data(avctx, ctx->fill_a, mb_count,
 600                                       buf + hdr_size + slice_size,
 601                                       data_size - hdr_size - slice_size, &a_data_size);
 602     }
 603
 604     if (ret != 0) {
 605         return ret;
 606     }
 607     return hdr_size + y_data_size + u_data_size + v_data_size + a_data_size;
 608 }
 609
 610 static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
 611         uint8_t *buf, const int buf_size)
 612 {
 613     int mb_width = (avctx->width + 15) >> 4;
 614     int mb_height = (avctx->height + 15) >> 4;
 615     int hdr_size, sl_size, i;
 616     int mb_y, sl_data_size, qp;
 617     int unsafe_bot, unsafe_right;
 618     uint8_t *sl_data, *sl_data_sizes;
 619     int slice_per_line = 0, rem = mb_width;
 620
 621     for (i = av_log2(DEFAULT_SLICE_MB_WIDTH); i >= 0; --i) {
 622         slice_per_line += rem >> i;
 623         rem &= (1 << i) - 1;
 624     }
 625
 626     qp = qp_start_table[avctx->profile];
 627     hdr_size = 8; sl_data_size = buf_size - hdr_size;
 628     sl_data_sizes = buf + hdr_size;
 629     sl_data = sl_data_sizes + (slice_per_line * mb_height * 2);
 630     for (mb_y = 0; mb_y < mb_height; mb_y++) {
 631         int mb_x = 0;
 632         int slice_mb_count = DEFAULT_SLICE_MB_WIDTH;
 633         while (mb_x < mb_width) {
 634             while (mb_width - mb_x < slice_mb_count)
 635                 slice_mb_count >>= 1;
 636
 637             unsafe_bot = (avctx->height & 0xf) && (mb_y == mb_height - 1);
 638             unsafe_right = (avctx->width & 0xf) && (mb_x + slice_mb_count == mb_width);
 639
 640             sl_size = encode_slice(avctx, pic, mb_x, mb_y, slice_mb_count,
 641                     sl_data, sl_data_size, unsafe_bot || unsafe_right, &qp);
 642             if (sl_size < 0){
 643                 return sl_size;
 644             }
 645
 646             bytestream_put_be16(&sl_data_sizes, sl_size);
 647             sl_data           += sl_size;
 648             sl_data_size      -= sl_size;
 649             mb_x              += slice_mb_count;
 650         }
 651     }
 652
 653     buf[0] = hdr_size << 3;
 654     AV_WB32(buf + 1, sl_data - buf);
 655     AV_WB16(buf + 5, slice_per_line * mb_height);
 656     buf[7] = av_log2(DEFAULT_SLICE_MB_WIDTH) << 4;
 657
 658     return sl_data - buf;
 659 }
 660
 661 static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 662                                const AVFrame *pict, int *got_packet)
 663 {
 664     ProresContext *ctx = avctx->priv_data;
 665     int header_size = 148;
 666     uint8_t *buf;
 667     int pic_size, ret;
 668     int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + AV_INPUT_BUFFER_MIN_SIZE; //FIXME choose tighter limit
 669
 670
 671     if ((ret = ff_alloc_packet2(avctx, pkt, frame_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
 672         return ret;
 673
 674     buf = pkt->data;
 675     pic_size = prores_encode_picture(avctx, pict, buf + header_size + 8,
 676             pkt->size - header_size - 8);
 677     if (pic_size < 0) {
 678         return pic_size;
 679     }
 680
 681     bytestream_put_be32(&buf, pic_size + 8 + header_size);
 682     bytestream_put_buffer(&buf, "icpf", 4);
 683
 684     bytestream_put_be16(&buf, header_size);
 685     bytestream_put_be16(&buf, 0); /* version */
 686     bytestream_put_buffer(&buf, ctx->vendor, 4);
 687     bytestream_put_be16(&buf, avctx->width);
 688     bytestream_put_be16(&buf, avctx->height);
 689     if (avctx->profile == FF_PROFILE_PRORES_4444) {
 690         *buf++ = 0xC2; // 444, not interlaced
 691     } else {
 692         *buf++ = 0x82; // 422, not interlaced
 693     }
 694     *buf++ = 0; /* reserved */
 695     /* only write color properties, if valid value. set to unspecified otherwise */
 696     *buf++ = ff_int_from_list_or_default(avctx, "frame color primaries", pict->color_primaries, valid_primaries, 0);
 697     *buf++ = ff_int_from_list_or_default(avctx, "frame color trc", pict->color_trc, valid_trc, 0);
 698     *buf++ = ff_int_from_list_or_default(avctx, "frame colorspace", pict->colorspace, valid_colorspace, 0);
 699     if (avctx->profile >= FF_PROFILE_PRORES_4444) {
 700         if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
 701             *buf++ = 0xA0;/* src b64a and no alpha */
 702         } else {
 703             *buf++ = 0xA2;/* src b64a and 16b alpha */
 704         }
 705     } else {
 706         *buf++ = 32;/* src v210 and no alpha */
 707     }
 708     *buf++ = 0; /* reserved */
 709     *buf++ = 3; /* luma and chroma matrix present */
 710
 711     bytestream_put_buffer(&buf, QMAT_LUMA[avctx->profile],   64);
 712     bytestream_put_buffer(&buf, QMAT_CHROMA[avctx->profile], 64);
 713
 714     pkt->flags |= AV_PKT_FLAG_KEY;
 715     pkt->size = pic_size + 8 + header_size;
 716     *got_packet = 1;
 717
 718     return 0;
 719 }
 720
 721 static void scale_mat(const uint8_t* src, int* dst, int scale)
 722 {
 723     int i;
 724     for (i = 0; i < 64; i++)
 725         dst[i] = src[i] * scale;
 726 }
 727
 728 static av_cold int prores_encode_init(AVCodecContext *avctx)
 729 {
 730     int i;
 731     ProresContext* ctx = avctx->priv_data;
 732
 733     avctx->bits_per_raw_sample = 10;
 734     ctx->need_alpha = 0;
 735
 736     if (avctx->width & 0x1) {
 737         av_log(avctx, AV_LOG_ERROR,
 738                 "frame width needs to be multiple of 2\n");
 739         return AVERROR(EINVAL);
 740     }
 741
 742     if (avctx->width > 65534 || avctx->height > 65535) {
 743         av_log(avctx, AV_LOG_ERROR,
 744                 "The maximum dimensions are 65534x65535\n");
 745         return AVERROR(EINVAL);
 746     }
 747
 748     if (strlen(ctx->vendor) != 4) {
 749         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
 750         return AVERROR(EINVAL);
 751     }
 752
 753     if (avctx->profile == FF_PROFILE_UNKNOWN) {
 754         if (avctx->pix_fmt == AV_PIX_FMT_YUV422P10) {
 755             avctx->profile = FF_PROFILE_PRORES_STANDARD;
 756             av_log(avctx, AV_LOG_INFO,
 757                 "encoding with ProRes standard (apcn) profile\n");
 758         } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
 759             avctx->profile = FF_PROFILE_PRORES_4444;
 760             av_log(avctx, AV_LOG_INFO,
 761                    "encoding with ProRes 4444 (ap4h) profile\n");
 762         } else if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
 763             avctx->profile = FF_PROFILE_PRORES_4444;
 764             av_log(avctx, AV_LOG_INFO,
 765                    "encoding with ProRes 4444+ (ap4h) profile\n");
 766         } else {
 767             av_log(avctx, AV_LOG_ERROR, "Unknown pixel format\n");
 768             return AVERROR(EINVAL);
 769         }
 770     } else if (avctx->profile < FF_PROFILE_PRORES_PROXY
 771             || avctx->profile > FF_PROFILE_PRORES_4444) {
 772         av_log(
 773                 avctx,
 774                 AV_LOG_ERROR,
 775                 "unknown profile %d, use [0 - apco, 1 - apcs, 2 - apcn (default), 3 - apch, 4 - ap4h]\n",
 776                 avctx->profile);
 777         return AVERROR(EINVAL);
 778     } else if ((avctx->pix_fmt == AV_PIX_FMT_YUV422P10) && (avctx->profile > FF_PROFILE_PRORES_HQ)){
 779         av_log(avctx, AV_LOG_ERROR,
 780                "encoding with ProRes 444 (ap4h) profile, need YUV444P10 input\n");
 781         return AVERROR(EINVAL);
 782     }  else if ((avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10)
 783                 && (avctx->profile < FF_PROFILE_PRORES_4444)){
 784         av_log(avctx, AV_LOG_ERROR,
 785                "encoding with ProRes Proxy/LT/422/422 HQ (apco, apcs, apcn, ap4h) profile, need YUV422P10 input\n");
 786         return AVERROR(EINVAL);
 787     }
 788
 789     if (avctx->profile < FF_PROFILE_PRORES_4444) { /* 422 versions */
 790         ctx->is_422 = 1;
 791         if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
 792             ctx->fill_y = av_malloc(4 * (DEFAULT_SLICE_MB_WIDTH << 8));
 793             if (!ctx->fill_y)
 794                 return AVERROR(ENOMEM);
 795             ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
 796             ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 8);
 797         }
 798     } else { /* 444 */
 799         ctx->is_422 = 0;
 800         if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
 801             ctx->fill_y = av_malloc(3 * (DEFAULT_SLICE_MB_WIDTH << 9));
 802             if (!ctx->fill_y)
 803                 return AVERROR(ENOMEM);
 804             ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
 805             ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 9);
 806         }
 807         if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
 808             ctx->need_alpha = 1;
 809             ctx->fill_a = av_malloc(DEFAULT_SLICE_MB_WIDTH << 9); /* 8 blocks x 16px x 16px x sizeof (uint16) */
 810             if (!ctx->fill_a)
 811                 return AVERROR(ENOMEM);
 812         }
 813     }
 814
 815     ff_fdctdsp_init(&ctx->fdsp, avctx);
 816
 817     avctx->codec_tag = AV_RL32((const uint8_t*)profiles[avctx->profile].name);
 818
 819     for (i = 1; i <= 16; i++) {
 820         scale_mat(QMAT_LUMA[avctx->profile]  , ctx->qmat_luma[i - 1]  , i);
 821         scale_mat(QMAT_CHROMA[avctx->profile], ctx->qmat_chroma[i - 1], i);
 822     }
 823
 824     return 0;
 825 }
 826
 827 static av_cold int prores_encode_close(AVCodecContext *avctx)
 828 {
 829     ProresContext* ctx = avctx->priv_data;
 830     av_freep(&ctx->fill_y);
 831     av_freep(&ctx->fill_a);
 832
 833     return 0;
 834 }
 835
 836 #define OFFSET(x) offsetof(ProresContext, x)
 837 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 838
 839 static const AVOption options[] = {
 840     { "vendor", "vendor ID", OFFSET(vendor), AV_OPT_TYPE_STRING, { .str = "fmpg" }, CHAR_MIN, CHAR_MAX, VE },
 841     { NULL }
 842 };
 843
 844 static const AVClass proresaw_enc_class = {
 845     .class_name = "ProResAw encoder",
 846     .item_name  = av_default_item_name,
 847     .option     = options,
 848     .version    = LIBAVUTIL_VERSION_INT,
 849 };
 850
 851 static const AVClass prores_enc_class = {
 852     .class_name = "ProRes encoder",
 853     .item_name  = av_default_item_name,
 854     .option     = options,
 855     .version    = LIBAVUTIL_VERSION_INT,
 856 };
 857
 858 AVCodec ff_prores_aw_encoder = {
 859     .name           = "prores_aw",
 860     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
 861     .type           = AVMEDIA_TYPE_VIDEO,
 862     .id             = AV_CODEC_ID_PRORES,
 863     .priv_data_size = sizeof(ProresContext),
 864     .init           = prores_encode_init,
 865     .close          = prores_encode_close,
 866     .encode2        = prores_encode_frame,
 867     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
 868     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
 869     .priv_class     = &proresaw_enc_class,
 870     .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 871 };
 872
 873 AVCodec ff_prores_encoder = {
 874     .name           = "prores",
 875     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
 876     .type           = AVMEDIA_TYPE_VIDEO,
 877     .id             = AV_CODEC_ID_PRORES,
 878     .priv_data_size = sizeof(ProresContext),
 879     .init           = prores_encode_init,
 880     .close          = prores_encode_close,
 881     .encode2        = prores_encode_frame,
 882     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
 883     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
 884     .priv_class     = &prores_enc_class,
 885     .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 886 };