4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
33 #include "bytestream.h"
35 #include "proresdata.h"
37 #define CFACTOR_Y422 2
38 #define CFACTOR_Y444 3
40 #define MAX_MBS_PER_SLICE 8
45 PRORES_PROFILE_AUTO = -1,
46 PRORES_PROFILE_PROXY = 0,
48 PRORES_PROFILE_STANDARD,
51 PRORES_PROFILE_4444XQ,
56 QUANT_MAT_PROXY_CHROMA,
64 static const uint8_t prores_quant_matrices[][64] = {
66 4, 7, 9, 11, 13, 14, 15, 63,
67 7, 7, 11, 12, 14, 15, 63, 63,
68 9, 11, 13, 14, 15, 63, 63, 63,
69 11, 11, 13, 14, 63, 63, 63, 63,
70 11, 13, 14, 63, 63, 63, 63, 63,
71 13, 14, 63, 63, 63, 63, 63, 63,
72 13, 63, 63, 63, 63, 63, 63, 63,
73 63, 63, 63, 63, 63, 63, 63, 63,
76 4, 7, 9, 11, 13, 14, 63, 63,
77 7, 7, 11, 12, 14, 63, 63, 63,
78 9, 11, 13, 14, 63, 63, 63, 63,
79 11, 11, 13, 14, 63, 63, 63, 63,
80 11, 13, 14, 63, 63, 63, 63, 63,
81 13, 14, 63, 63, 63, 63, 63, 63,
82 13, 63, 63, 63, 63, 63, 63, 63,
83 63, 63, 63, 63, 63, 63, 63, 63
86 4, 5, 6, 7, 9, 11, 13, 15,
87 5, 5, 7, 8, 11, 13, 15, 17,
88 6, 7, 9, 11, 13, 15, 15, 17,
89 7, 7, 9, 11, 13, 15, 17, 19,
90 7, 9, 11, 13, 14, 16, 19, 23,
91 9, 11, 13, 14, 16, 19, 23, 29,
92 9, 11, 13, 15, 17, 21, 28, 35,
93 11, 13, 16, 17, 21, 28, 35, 41,
96 4, 4, 5, 5, 6, 7, 7, 9,
97 4, 4, 5, 6, 7, 7, 9, 9,
98 5, 5, 6, 7, 7, 9, 9, 10,
99 5, 5, 6, 7, 7, 9, 9, 10,
100 5, 6, 7, 7, 8, 9, 10, 12,
101 6, 7, 7, 8, 9, 10, 12, 15,
102 6, 7, 7, 9, 10, 11, 14, 17,
103 7, 7, 9, 10, 11, 14, 17, 21,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
109 4, 4, 4, 4, 4, 4, 4, 5,
110 4, 4, 4, 4, 4, 4, 5, 5,
111 4, 4, 4, 4, 4, 5, 5, 6,
112 4, 4, 4, 4, 5, 5, 6, 7,
113 4, 4, 4, 4, 5, 6, 7, 7,
116 2, 2, 2, 2, 2, 2, 2, 2,
117 2, 2, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 2, 2, 2,
119 2, 2, 2, 2, 2, 2, 2, 3,
120 2, 2, 2, 2, 2, 2, 3, 3,
121 2, 2, 2, 2, 2, 3, 3, 3,
122 2, 2, 2, 2, 3, 3, 3, 4,
123 2, 2, 2, 2, 3, 3, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4,
137 #define NUM_MB_LIMITS 4
138 static const int prores_mb_limits[NUM_MB_LIMITS] = {
139 1620, // up to 720x576
140 2700, // up to 960x720
141 6075, // up to 1440x1080
142 9216, // up to 2048x1152
145 static const struct prores_profile {
146 const char *full_name;
150 int br_tab[NUM_MB_LIMITS];
153 } prores_profile_info[6] = {
155 .full_name = "proxy",
156 .tag = MKTAG('a', 'p', 'c', 'o'),
159 .br_tab = { 300, 242, 220, 194 },
160 .quant = QUANT_MAT_PROXY,
161 .quant_chroma = QUANT_MAT_PROXY_CHROMA,
165 .tag = MKTAG('a', 'p', 'c', 's'),
168 .br_tab = { 720, 560, 490, 440 },
169 .quant = QUANT_MAT_LT,
170 .quant_chroma = QUANT_MAT_LT,
173 .full_name = "standard",
174 .tag = MKTAG('a', 'p', 'c', 'n'),
177 .br_tab = { 1050, 808, 710, 632 },
178 .quant = QUANT_MAT_STANDARD,
179 .quant_chroma = QUANT_MAT_STANDARD,
182 .full_name = "high quality",
183 .tag = MKTAG('a', 'p', 'c', 'h'),
186 .br_tab = { 1566, 1216, 1070, 950 },
187 .quant = QUANT_MAT_HQ,
188 .quant_chroma = QUANT_MAT_HQ,
192 .tag = MKTAG('a', 'p', '4', 'h'),
195 .br_tab = { 2350, 1828, 1600, 1425 },
196 .quant = QUANT_MAT_HQ,
197 .quant_chroma = QUANT_MAT_HQ,
200 .full_name = "4444XQ",
201 .tag = MKTAG('a', 'p', '4', 'x'),
204 .br_tab = { 3525, 2742, 2400, 2137 },
205 .quant = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
206 .quant_chroma = QUANT_MAT_HQ,
210 #define TRELLIS_WIDTH 16
211 #define SCORE_LIMIT INT_MAX / 2
220 #define MAX_STORED_Q 16
222 typedef struct ProresThreadData {
223 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
224 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
225 int16_t custom_q[64];
226 int16_t custom_chroma_q[64];
227 struct TrellisNode *nodes;
230 typedef struct ProresContext {
232 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
233 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
234 int16_t quants[MAX_STORED_Q][64];
235 int16_t quants_chroma[MAX_STORED_Q][64];
236 int16_t custom_q[64];
237 int16_t custom_chroma_q[64];
238 const uint8_t *quant_mat;
239 const uint8_t *quant_chroma_mat;
240 const uint8_t *scantable;
242 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
243 ptrdiff_t linesize, int16_t *block);
247 int mb_width, mb_height;
249 int num_chroma_blocks, chroma_factor;
251 int slices_per_picture;
252 int pictures_per_frame; // 1 for progressive, 2 for interlaced
263 int frame_size_upper_bound;
266 const struct prores_profile *profile_info;
270 ProresThreadData *tdata;
273 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
274 ptrdiff_t linesize, int x, int y, int w, int h,
275 int16_t *blocks, uint16_t *emu_buf,
276 int mbs_per_slice, int blocks_per_mb, int is_chroma)
278 const uint16_t *esrc;
279 const int mb_width = 4 * blocks_per_mb;
283 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
285 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
289 if (x + mb_width <= w && y + 16 <= h) {
291 elinesize = linesize;
296 elinesize = 16 * sizeof(*emu_buf);
298 bw = FFMIN(w - x, mb_width);
299 bh = FFMIN(h - y, 16);
301 for (j = 0; j < bh; j++) {
302 memcpy(emu_buf + j * 16,
303 (const uint8_t*)src + j * linesize,
305 pix = emu_buf[j * 16 + bw - 1];
306 for (k = bw; k < mb_width; k++)
307 emu_buf[j * 16 + k] = pix;
310 memcpy(emu_buf + j * 16,
311 emu_buf + (bh - 1) * 16,
312 mb_width * sizeof(*emu_buf));
315 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
317 if (blocks_per_mb > 2) {
318 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
321 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
323 if (blocks_per_mb > 2) {
324 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
328 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
330 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
332 if (blocks_per_mb > 2) {
333 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
335 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
344 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
345 ptrdiff_t linesize, int x, int y, int w, int h,
346 int16_t *blocks, int mbs_per_slice, int abits)
348 const int slice_width = 16 * mbs_per_slice;
349 int i, j, copy_w, copy_h;
351 copy_w = FFMIN(w - x, slice_width);
352 copy_h = FFMIN(h - y, 16);
353 for (i = 0; i < copy_h; i++) {
354 memcpy(blocks, src, copy_w * sizeof(*src));
356 for (j = 0; j < copy_w; j++)
359 for (j = 0; j < copy_w; j++)
360 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
361 for (j = copy_w; j < slice_width; j++)
362 blocks[j] = blocks[copy_w - 1];
363 blocks += slice_width;
364 src += linesize >> 1;
366 for (; i < 16; i++) {
367 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
368 blocks += slice_width;
373 * Write an unsigned rice/exp golomb codeword.
375 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
377 unsigned int rice_order, exp_order, switch_bits, switch_val;
380 /* number of prefix bits to switch between Rice and expGolomb */
381 switch_bits = (codebook & 3) + 1;
382 rice_order = codebook >> 5; /* rice code order */
383 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
385 switch_val = switch_bits << rice_order;
387 if (val >= switch_val) {
388 val -= switch_val - (1 << exp_order);
389 exponent = av_log2(val);
391 put_bits(pb, exponent - exp_order + switch_bits, 0);
392 put_bits(pb, exponent + 1, val);
394 exponent = val >> rice_order;
397 put_bits(pb, exponent, 0);
400 put_sbits(pb, rice_order, val);
404 #define GET_SIGN(x) ((x) >> 31)
405 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
407 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
408 int blocks_per_slice, int scale)
411 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
413 prev_dc = (blocks[0] - 0x4000) / scale;
414 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
419 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
420 dc = (blocks[0] - 0x4000) / scale;
421 delta = dc - prev_dc;
422 new_sign = GET_SIGN(delta);
423 delta = (delta ^ sign) - sign;
424 code = MAKE_CODE(delta);
425 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
426 codebook = (code + (code & 1)) >> 1;
427 codebook = FFMIN(codebook, 3);
433 static void encode_acs(PutBitContext *pb, int16_t *blocks,
434 int blocks_per_slice,
435 int plane_size_factor,
436 const uint8_t *scan, const int16_t *qmat)
439 int run, level, run_cb, lev_cb;
440 int max_coeffs, abs_level;
442 max_coeffs = blocks_per_slice << 6;
443 run_cb = ff_prores_run_to_cb_index[4];
444 lev_cb = ff_prores_lev_to_cb_index[2];
447 for (i = 1; i < 64; i++) {
448 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
449 level = blocks[idx] / qmat[scan[i]];
451 abs_level = FFABS(level);
452 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
453 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
455 put_sbits(pb, 1, GET_SIGN(level));
457 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
458 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
467 static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
468 const uint16_t *src, ptrdiff_t linesize,
469 int mbs_per_slice, int16_t *blocks,
470 int blocks_per_mb, int plane_size_factor,
473 int blocks_per_slice = mbs_per_slice * blocks_per_mb;
475 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
476 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
477 ctx->scantable, qmat);
480 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
482 const int dbits = (abits == 8) ? 4 : 7;
483 const int dsize = 1 << dbits - 1;
484 int diff = cur - prev;
486 diff = av_mod_uintp2(diff, abits);
487 if (diff >= (1 << abits) - dsize)
489 if (diff < -dsize || diff > dsize || !diff) {
491 put_bits(pb, abits, diff);
494 put_bits(pb, dbits - 1, FFABS(diff) - 1);
495 put_bits(pb, 1, diff < 0);
499 static void put_alpha_run(PutBitContext *pb, int run)
504 put_bits(pb, 4, run);
506 put_bits(pb, 15, run);
512 // todo alpha quantisation for high quants
513 static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
514 int mbs_per_slice, uint16_t *blocks,
517 const int abits = ctx->alpha_bits;
518 const int mask = (1 << abits) - 1;
519 const int num_coeffs = mbs_per_slice * 256;
520 int prev = mask, cur;
525 put_alpha_diff(pb, cur, prev, abits);
530 put_alpha_run (pb, run);
531 put_alpha_diff(pb, cur, prev, abits);
537 } while (idx < num_coeffs);
539 put_alpha_run(pb, run);
542 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
544 int sizes[4], int x, int y, int quant,
547 ProresContext *ctx = avctx->priv_data;
551 int slice_width_factor = av_log2(mbs_per_slice);
552 int num_cblocks, pwidth, line_add;
554 int plane_factor, is_chroma;
556 uint16_t *qmat_chroma;
558 if (ctx->pictures_per_frame == 1)
561 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
563 if (ctx->force_quant) {
564 qmat = ctx->quants[0];
565 qmat_chroma = ctx->quants_chroma[0];
566 } else if (quant < MAX_STORED_Q) {
567 qmat = ctx->quants[quant];
568 qmat_chroma = ctx->quants_chroma[quant];
570 qmat = ctx->custom_q;
571 qmat_chroma = ctx->custom_chroma_q;
572 for (i = 0; i < 64; i++) {
573 qmat[i] = ctx->quant_mat[i] * quant;
574 qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
578 for (i = 0; i < ctx->num_planes; i++) {
579 is_chroma = (i == 1 || i == 2);
580 plane_factor = slice_width_factor + 2;
582 plane_factor += ctx->chroma_factor - 3;
583 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
587 pwidth = avctx->width;
592 pwidth = avctx->width >> 1;
595 linesize = pic->linesize[i] * ctx->pictures_per_frame;
596 src = (const uint16_t*)(pic->data[i] + yp * linesize +
597 line_add * pic->linesize[i]) + xp;
600 get_slice_data(ctx, src, linesize, xp, yp,
601 pwidth, avctx->height / ctx->pictures_per_frame,
602 ctx->blocks[0], ctx->emu_buf,
603 mbs_per_slice, num_cblocks, is_chroma);
604 if (!is_chroma) {/* luma quant */
605 encode_slice_plane(ctx, pb, src, linesize,
606 mbs_per_slice, ctx->blocks[0],
607 num_cblocks, plane_factor, qmat);
608 } else { /* chroma plane */
609 encode_slice_plane(ctx, pb, src, linesize,
610 mbs_per_slice, ctx->blocks[0],
611 num_cblocks, plane_factor, qmat_chroma);
614 get_alpha_data(ctx, src, linesize, xp, yp,
615 pwidth, avctx->height / ctx->pictures_per_frame,
616 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
617 encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
620 sizes[i] = put_bytes_output(pb) - total_size;
621 total_size = put_bytes_output(pb);
626 static inline int estimate_vlc(unsigned codebook, int val)
628 unsigned int rice_order, exp_order, switch_bits, switch_val;
631 /* number of prefix bits to switch between Rice and expGolomb */
632 switch_bits = (codebook & 3) + 1;
633 rice_order = codebook >> 5; /* rice code order */
634 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
636 switch_val = switch_bits << rice_order;
638 if (val >= switch_val) {
639 val -= switch_val - (1 << exp_order);
640 exponent = av_log2(val);
642 return exponent * 2 - exp_order + switch_bits + 1;
644 return (val >> rice_order) + rice_order + 1;
648 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
652 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
655 prev_dc = (blocks[0] - 0x4000) / scale;
656 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
660 *error += FFABS(blocks[0] - 0x4000) % scale;
662 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
663 dc = (blocks[0] - 0x4000) / scale;
664 *error += FFABS(blocks[0] - 0x4000) % scale;
665 delta = dc - prev_dc;
666 new_sign = GET_SIGN(delta);
667 delta = (delta ^ sign) - sign;
668 code = MAKE_CODE(delta);
669 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
670 codebook = (code + (code & 1)) >> 1;
671 codebook = FFMIN(codebook, 3);
679 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
680 int plane_size_factor,
681 const uint8_t *scan, const int16_t *qmat)
684 int run, level, run_cb, lev_cb;
685 int max_coeffs, abs_level;
688 max_coeffs = blocks_per_slice << 6;
689 run_cb = ff_prores_run_to_cb_index[4];
690 lev_cb = ff_prores_lev_to_cb_index[2];
693 for (i = 1; i < 64; i++) {
694 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
695 level = blocks[idx] / qmat[scan[i]];
696 *error += FFABS(blocks[idx]) % qmat[scan[i]];
698 abs_level = FFABS(level);
699 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
700 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
703 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
704 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
715 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
716 const uint16_t *src, ptrdiff_t linesize,
718 int blocks_per_mb, int plane_size_factor,
719 const int16_t *qmat, ProresThreadData *td)
721 int blocks_per_slice;
724 blocks_per_slice = mbs_per_slice * blocks_per_mb;
726 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
727 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
728 plane_size_factor, ctx->scantable, qmat);
730 return FFALIGN(bits, 8);
733 static int est_alpha_diff(int cur, int prev, int abits)
735 const int dbits = (abits == 8) ? 4 : 7;
736 const int dsize = 1 << dbits - 1;
737 int diff = cur - prev;
739 diff = av_mod_uintp2(diff, abits);
740 if (diff >= (1 << abits) - dsize)
742 if (diff < -dsize || diff > dsize || !diff)
748 static int estimate_alpha_plane(ProresContext *ctx,
749 const uint16_t *src, ptrdiff_t linesize,
750 int mbs_per_slice, int16_t *blocks)
752 const int abits = ctx->alpha_bits;
753 const int mask = (1 << abits) - 1;
754 const int num_coeffs = mbs_per_slice * 256;
755 int prev = mask, cur;
761 bits = est_alpha_diff(cur, prev, abits);
772 bits += est_alpha_diff(cur, prev, abits);
778 } while (idx < num_coeffs);
790 static int find_slice_quant(AVCodecContext *avctx,
791 int trellis_node, int x, int y, int mbs_per_slice,
792 ProresThreadData *td)
794 ProresContext *ctx = avctx->priv_data;
795 int i, q, pq, xp, yp;
797 int slice_width_factor = av_log2(mbs_per_slice);
798 int num_cblocks[MAX_PLANES], pwidth;
799 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
800 const int min_quant = ctx->profile_info->min_quant;
801 const int max_quant = ctx->profile_info->max_quant;
802 int error, bits, bits_limit;
803 int mbs, prev, cur, new_score;
804 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
807 uint16_t *qmat_chroma;
808 int linesize[4], line_add;
811 if (ctx->pictures_per_frame == 1)
814 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
815 mbs = x + mbs_per_slice;
817 for (i = 0; i < ctx->num_planes; i++) {
818 is_chroma[i] = (i == 1 || i == 2);
819 plane_factor[i] = slice_width_factor + 2;
821 plane_factor[i] += ctx->chroma_factor - 3;
822 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
826 pwidth = avctx->width;
831 pwidth = avctx->width >> 1;
834 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
835 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
836 line_add * ctx->pic->linesize[i]) + xp;
839 get_slice_data(ctx, src, linesize[i], xp, yp,
840 pwidth, avctx->height / ctx->pictures_per_frame,
841 td->blocks[i], td->emu_buf,
842 mbs_per_slice, num_cblocks[i], is_chroma[i]);
844 get_alpha_data(ctx, src, linesize[i], xp, yp,
845 pwidth, avctx->height / ctx->pictures_per_frame,
846 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
850 for (q = min_quant; q < max_quant + 2; q++) {
851 td->nodes[trellis_node + q].prev_node = -1;
852 td->nodes[trellis_node + q].quant = q;
856 alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
857 mbs_per_slice, td->blocks[3]);
858 // todo: maybe perform coarser quantising to fit into frame size when needed
859 for (q = min_quant; q <= max_quant; q++) {
862 bits += estimate_slice_plane(ctx, &error, 0,
865 num_cblocks[0], plane_factor[0],
866 ctx->quants[q], td); /* estimate luma plane */
867 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
868 bits += estimate_slice_plane(ctx, &error, i,
871 num_cblocks[i], plane_factor[i],
872 ctx->quants_chroma[q], td);
874 if (bits > 65000 * 8)
877 slice_bits[q] = bits;
878 slice_score[q] = error;
880 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
881 slice_bits[max_quant + 1] = slice_bits[max_quant];
882 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
883 overquant = max_quant;
885 for (q = max_quant + 1; q < 128; q++) {
888 if (q < MAX_STORED_Q) {
889 qmat = ctx->quants[q];
890 qmat_chroma = ctx->quants_chroma[q];
893 qmat_chroma = td->custom_chroma_q;
894 for (i = 0; i < 64; i++) {
895 qmat[i] = ctx->quant_mat[i] * q;
896 qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
899 bits += estimate_slice_plane(ctx, &error, 0,
902 num_cblocks[0], plane_factor[0],
903 qmat, td);/* estimate luma plane */
904 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
905 bits += estimate_slice_plane(ctx, &error, i,
908 num_cblocks[i], plane_factor[i],
911 if (bits <= ctx->bits_per_mb * mbs_per_slice)
915 slice_bits[max_quant + 1] = bits;
916 slice_score[max_quant + 1] = error;
919 td->nodes[trellis_node + max_quant + 1].quant = overquant;
921 bits_limit = mbs * ctx->bits_per_mb;
922 for (pq = min_quant; pq < max_quant + 2; pq++) {
923 prev = trellis_node - TRELLIS_WIDTH + pq;
925 for (q = min_quant; q < max_quant + 2; q++) {
926 cur = trellis_node + q;
928 bits = td->nodes[prev].bits + slice_bits[q];
929 error = slice_score[q];
930 if (bits > bits_limit)
933 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
934 new_score = td->nodes[prev].score + error;
936 new_score = SCORE_LIMIT;
937 if (td->nodes[cur].prev_node == -1 ||
938 td->nodes[cur].score >= new_score) {
940 td->nodes[cur].bits = bits;
941 td->nodes[cur].score = new_score;
942 td->nodes[cur].prev_node = prev;
947 error = td->nodes[trellis_node + min_quant].score;
948 pq = trellis_node + min_quant;
949 for (q = min_quant + 1; q < max_quant + 2; q++) {
950 if (td->nodes[trellis_node + q].score <= error) {
951 error = td->nodes[trellis_node + q].score;
952 pq = trellis_node + q;
959 static int find_quant_thread(AVCodecContext *avctx, void *arg,
960 int jobnr, int threadnr)
962 ProresContext *ctx = avctx->priv_data;
963 ProresThreadData *td = ctx->tdata + threadnr;
964 int mbs_per_slice = ctx->mbs_per_slice;
965 int x, y = jobnr, mb, q = 0;
967 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
968 while (ctx->mb_width - x < mbs_per_slice)
970 q = find_slice_quant(avctx,
971 (mb + 1) * TRELLIS_WIDTH, x, y,
975 for (x = ctx->slices_width - 1; x >= 0; x--) {
976 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
977 q = td->nodes[q].prev_node;
983 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
984 const AVFrame *pic, int *got_packet)
986 ProresContext *ctx = avctx->priv_data;
987 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
988 uint8_t *picture_size_pos;
990 int x, y, i, mb, q = 0;
991 int sizes[4] = { 0 };
992 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
993 int frame_size, picture_size, slice_size;
995 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
999 pkt_size = ctx->frame_size_upper_bound;
1001 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
1004 orig_buf = pkt->data;
1007 orig_buf += 4; // frame size
1008 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
1013 buf += 2; // frame header size will be stored here
1014 bytestream_put_be16 (&buf, 0); // version 1
1015 bytestream_put_buffer(&buf, ctx->vendor, 4);
1016 bytestream_put_be16 (&buf, avctx->width);
1017 bytestream_put_be16 (&buf, avctx->height);
1019 frame_flags = ctx->chroma_factor << 6;
1020 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1021 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1022 bytestream_put_byte (&buf, frame_flags);
1024 bytestream_put_byte (&buf, 0); // reserved
1025 bytestream_put_byte (&buf, pic->color_primaries);
1026 bytestream_put_byte (&buf, pic->color_trc);
1027 bytestream_put_byte (&buf, pic->colorspace);
1028 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
1029 bytestream_put_byte (&buf, 0); // reserved
1030 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1031 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
1032 // luma quantisation matrix
1033 for (i = 0; i < 64; i++)
1034 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1035 // chroma quantisation matrix
1036 for (i = 0; i < 64; i++)
1037 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1039 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
1041 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
1043 for (ctx->cur_picture_idx = 0;
1044 ctx->cur_picture_idx < ctx->pictures_per_frame;
1045 ctx->cur_picture_idx++) {
1047 picture_size_pos = buf + 1;
1048 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1049 buf += 4; // picture data size will be stored here
1050 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1051 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1053 // seek table - will be filled during slice encoding
1055 buf += ctx->slices_per_picture * 2;
1058 if (!ctx->force_quant) {
1059 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1065 for (y = 0; y < ctx->mb_height; y++) {
1066 int mbs_per_slice = ctx->mbs_per_slice;
1067 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1068 q = ctx->force_quant ? ctx->force_quant
1069 : ctx->slice_q[mb + y * ctx->slices_width];
1071 while (ctx->mb_width - x < mbs_per_slice)
1072 mbs_per_slice >>= 1;
1074 bytestream_put_byte(&buf, slice_hdr_size << 3);
1076 buf += slice_hdr_size - 1;
1077 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1078 uint8_t *start = pkt->data;
1079 // Recompute new size according to max_slice_size
1081 int delta = 200 + (ctx->pictures_per_frame *
1082 ctx->slices_per_picture + 1) *
1083 max_slice_size - pkt_size;
1085 delta = FFMAX(delta, 2 * max_slice_size);
1086 ctx->frame_size_upper_bound += delta;
1089 avpriv_request_sample(avctx,
1090 "Packet too small: is %i,"
1091 " needs %i (slice: %i). "
1092 "Correct allocation",
1093 pkt_size, delta, max_slice_size);
1097 ret = av_grow_packet(pkt, delta);
1103 orig_buf = pkt->data + (orig_buf - start);
1104 buf = pkt->data + (buf - start);
1105 picture_size_pos = pkt->data + (picture_size_pos - start);
1106 slice_sizes = pkt->data + (slice_sizes - start);
1107 slice_hdr = pkt->data + (slice_hdr - start);
1108 tmp = pkt->data + (tmp - start);
1110 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1111 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1116 bytestream_put_byte(&slice_hdr, q);
1117 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1118 for (i = 0; i < ctx->num_planes - 1; i++) {
1119 bytestream_put_be16(&slice_hdr, sizes[i]);
1120 slice_size += sizes[i];
1122 bytestream_put_be16(&slice_sizes, slice_size);
1123 buf += slice_size - slice_hdr_size;
1124 if (max_slice_size < slice_size)
1125 max_slice_size = slice_size;
1129 picture_size = buf - (picture_size_pos - 1);
1130 bytestream_put_be32(&picture_size_pos, picture_size);
1134 frame_size = buf - orig_buf;
1135 bytestream_put_be32(&orig_buf, frame_size);
1137 pkt->size = frame_size;
1138 pkt->flags |= AV_PKT_FLAG_KEY;
1144 static av_cold int encode_close(AVCodecContext *avctx)
1146 ProresContext *ctx = avctx->priv_data;
1150 for (i = 0; i < avctx->thread_count; i++)
1151 av_freep(&ctx->tdata[i].nodes);
1153 av_freep(&ctx->tdata);
1154 av_freep(&ctx->slice_q);
1159 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1160 ptrdiff_t linesize, int16_t *block)
1163 const uint16_t *tsrc = src;
1165 for (y = 0; y < 8; y++) {
1166 for (x = 0; x < 8; x++)
1167 block[y * 8 + x] = tsrc[x];
1168 tsrc += linesize >> 1;
1173 static av_cold int encode_init(AVCodecContext *avctx)
1175 ProresContext *ctx = avctx->priv_data;
1178 int min_quant, max_quant;
1179 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1181 avctx->bits_per_raw_sample = 10;
1183 ctx->fdct = prores_fdct;
1184 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1185 : ff_prores_progressive_scan;
1186 ff_fdctdsp_init(&ctx->fdsp, avctx);
1188 mps = ctx->mbs_per_slice;
1189 if (mps & (mps - 1)) {
1190 av_log(avctx, AV_LOG_ERROR,
1191 "there should be an integer power of two MBs per slice\n");
1192 return AVERROR(EINVAL);
1194 if (ctx->profile == PRORES_PROFILE_AUTO) {
1195 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1196 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1197 !(desc->log2_chroma_w + desc->log2_chroma_h))
1198 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1199 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1200 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1201 ? "4:4:4:4 profile because of the used input colorspace"
1202 : "HQ profile to keep best quality");
1204 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1205 if (ctx->profile != PRORES_PROFILE_4444 &&
1206 ctx->profile != PRORES_PROFILE_4444XQ) {
1207 // force alpha and warn
1208 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1209 "encode alpha. Override with -profile if needed.\n");
1210 ctx->alpha_bits = 0;
1212 if (ctx->alpha_bits & 7) {
1213 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1214 return AVERROR(EINVAL);
1216 avctx->bits_per_coded_sample = 32;
1218 ctx->alpha_bits = 0;
1221 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1224 ctx->profile_info = prores_profile_info + ctx->profile;
1225 ctx->num_planes = 3 + !!ctx->alpha_bits;
1227 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1230 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1232 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1234 ctx->slices_width = ctx->mb_width / mps;
1235 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1236 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1237 ctx->pictures_per_frame = 1 + interlaced;
1239 if (ctx->quant_sel == -1) {
1240 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1241 ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1243 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1244 ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1247 if (strlen(ctx->vendor) != 4) {
1248 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1249 return AVERROR_INVALIDDATA;
1252 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1253 if (!ctx->force_quant) {
1254 if (!ctx->bits_per_mb) {
1255 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1256 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1257 ctx->pictures_per_frame)
1259 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1260 if (ctx->alpha_bits)
1261 ctx->bits_per_mb *= 20;
1262 } else if (ctx->bits_per_mb < 128) {
1263 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1264 return AVERROR_INVALIDDATA;
1267 min_quant = ctx->profile_info->min_quant;
1268 max_quant = ctx->profile_info->max_quant;
1269 for (i = min_quant; i < MAX_STORED_Q; i++) {
1270 for (j = 0; j < 64; j++) {
1271 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1272 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1276 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1277 if (!ctx->slice_q) {
1278 encode_close(avctx);
1279 return AVERROR(ENOMEM);
1282 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1284 encode_close(avctx);
1285 return AVERROR(ENOMEM);
1288 for (j = 0; j < avctx->thread_count; j++) {
1289 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1291 * sizeof(*ctx->tdata->nodes));
1292 if (!ctx->tdata[j].nodes) {
1293 encode_close(avctx);
1294 return AVERROR(ENOMEM);
1296 for (i = min_quant; i < max_quant + 2; i++) {
1297 ctx->tdata[j].nodes[i].prev_node = -1;
1298 ctx->tdata[j].nodes[i].bits = 0;
1299 ctx->tdata[j].nodes[i].score = 0;
1306 if (ctx->force_quant > 64) {
1307 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1308 return AVERROR_INVALIDDATA;
1311 for (j = 0; j < 64; j++) {
1312 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1313 ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1314 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1315 ls_chroma += av_log2((1 << 11) / ctx->quants_chroma[0][j]) * 2 + 1;
1318 ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1319 if (ctx->chroma_factor == CFACTOR_Y444)
1320 ctx->bits_per_mb += ls_chroma * 4;
1323 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1324 ctx->slices_per_picture + 1) *
1325 (2 + 2 * ctx->num_planes +
1326 (mps * ctx->bits_per_mb) / 8)
1329 if (ctx->alpha_bits) {
1330 // The alpha plane is run-coded and might exceed the bit budget.
1331 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1332 ctx->slices_per_picture + 1) *
1333 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1334 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1337 avctx->codec_tag = ctx->profile_info->tag;
1339 av_log(avctx, AV_LOG_DEBUG,
1340 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1341 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1342 interlaced ? "yes" : "no", ctx->bits_per_mb);
1343 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1344 ctx->frame_size_upper_bound);
1349 #define OFFSET(x) offsetof(ProresContext, x)
1350 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1352 static const AVOption options[] = {
1353 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1354 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1355 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1356 { .i64 = PRORES_PROFILE_AUTO },
1357 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1358 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1359 0, 0, VE, "profile" },
1360 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1361 0, 0, VE, "profile" },
1362 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1363 0, 0, VE, "profile" },
1364 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1365 0, 0, VE, "profile" },
1366 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1367 0, 0, VE, "profile" },
1368 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1369 0, 0, VE, "profile" },
1370 { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1371 0, 0, VE, "profile" },
1372 { "vendor", "vendor ID", OFFSET(vendor),
1373 AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1374 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1375 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1376 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1377 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1378 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1379 0, 0, VE, "quant_mat" },
1380 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1381 0, 0, VE, "quant_mat" },
1382 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1383 0, 0, VE, "quant_mat" },
1384 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1385 0, 0, VE, "quant_mat" },
1386 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1387 0, 0, VE, "quant_mat" },
1388 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1389 0, 0, VE, "quant_mat" },
1390 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1391 { .i64 = 16 }, 0, 16, VE },
1395 static const AVClass proresenc_class = {
1396 .class_name = "ProRes encoder",
1397 .item_name = av_default_item_name,
1399 .version = LIBAVUTIL_VERSION_INT,
1402 const AVCodec ff_prores_ks_encoder = {
1403 .name = "prores_ks",
1404 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1405 .type = AVMEDIA_TYPE_VIDEO,
1406 .id = AV_CODEC_ID_PRORES,
1407 .priv_data_size = sizeof(ProresContext),
1408 .init = encode_init,
1409 .close = encode_close,
1410 .encode2 = encode_frame,
1411 .capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1412 .pix_fmts = (const enum AVPixelFormat[]) {
1413 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1414 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1416 .priv_class = &proresenc_class,
1417 .profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles),