4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_PROXY = 0,
45 PRORES_PROFILE_STANDARD,
58 static const uint8_t prores_quant_matrices[][64] = {
60 4, 7, 9, 11, 13, 14, 15, 63,
61 7, 7, 11, 12, 14, 15, 63, 63,
62 9, 11, 13, 14, 15, 63, 63, 63,
63 11, 11, 13, 14, 63, 63, 63, 63,
64 11, 13, 14, 63, 63, 63, 63, 63,
65 13, 14, 63, 63, 63, 63, 63, 63,
66 13, 63, 63, 63, 63, 63, 63, 63,
67 63, 63, 63, 63, 63, 63, 63, 63,
70 4, 5, 6, 7, 9, 11, 13, 15,
71 5, 5, 7, 8, 11, 13, 15, 17,
72 6, 7, 9, 11, 13, 15, 15, 17,
73 7, 7, 9, 11, 13, 15, 17, 19,
74 7, 9, 11, 13, 14, 16, 19, 23,
75 9, 11, 13, 14, 16, 19, 23, 29,
76 9, 11, 13, 15, 17, 21, 28, 35,
77 11, 13, 16, 17, 21, 28, 35, 41,
80 4, 4, 5, 5, 6, 7, 7, 9,
81 4, 4, 5, 6, 7, 7, 9, 9,
82 5, 5, 6, 7, 7, 9, 9, 10,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 6, 7, 7, 8, 9, 10, 12,
85 6, 7, 7, 8, 9, 10, 12, 15,
86 6, 7, 7, 9, 10, 11, 14, 17,
87 7, 7, 9, 10, 11, 14, 17, 21,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 5,
94 4, 4, 4, 4, 4, 4, 5, 5,
95 4, 4, 4, 4, 4, 5, 5, 6,
96 4, 4, 4, 4, 5, 5, 6, 7,
97 4, 4, 4, 4, 5, 6, 7, 7,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
111 #define NUM_MB_LIMITS 4
112 static const int prores_mb_limits[NUM_MB_LIMITS] = {
113 1620, // up to 720x576
114 2700, // up to 960x720
115 6075, // up to 1440x1080
116 9216, // up to 2048x1152
119 static const struct prores_profile {
120 const char *full_name;
124 int br_tab[NUM_MB_LIMITS];
126 } prores_profile_info[5] = {
128 .full_name = "proxy",
129 .tag = MKTAG('a', 'p', 'c', 'o'),
132 .br_tab = { 300, 242, 220, 194 },
133 .quant = QUANT_MAT_PROXY,
137 .tag = MKTAG('a', 'p', 'c', 's'),
140 .br_tab = { 720, 560, 490, 440 },
141 .quant = QUANT_MAT_LT,
144 .full_name = "standard",
145 .tag = MKTAG('a', 'p', 'c', 'n'),
148 .br_tab = { 1050, 808, 710, 632 },
149 .quant = QUANT_MAT_STANDARD,
152 .full_name = "high quality",
153 .tag = MKTAG('a', 'p', 'c', 'h'),
156 .br_tab = { 1566, 1216, 1070, 950 },
157 .quant = QUANT_MAT_HQ,
161 .tag = MKTAG('a', 'p', '4', 'h'),
164 .br_tab = { 2350, 1828, 1600, 1425 },
165 .quant = QUANT_MAT_HQ,
169 #define TRELLIS_WIDTH 16
170 #define SCORE_LIMIT INT_MAX / 2
179 #define MAX_STORED_Q 16
181 typedef struct ProresThreadData {
182 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
183 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
184 int16_t custom_q[64];
185 struct TrellisNode *nodes;
188 typedef struct ProresContext {
190 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
191 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
192 int16_t quants[MAX_STORED_Q][64];
193 int16_t custom_q[64];
194 const uint8_t *quant_mat;
195 const uint8_t *scantable;
197 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
198 int linesize, int16_t *block);
201 int mb_width, mb_height;
203 int num_chroma_blocks, chroma_factor;
205 int slices_per_picture;
206 int pictures_per_frame; // 1 for progressive, 2 for interlaced
217 int frame_size_upper_bound;
220 const struct prores_profile *profile_info;
224 ProresThreadData *tdata;
227 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
228 int linesize, int x, int y, int w, int h,
229 int16_t *blocks, uint16_t *emu_buf,
230 int mbs_per_slice, int blocks_per_mb, int is_chroma)
232 const uint16_t *esrc;
233 const int mb_width = 4 * blocks_per_mb;
237 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
239 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
243 if (x + mb_width <= w && y + 16 <= h) {
245 elinesize = linesize;
250 elinesize = 16 * sizeof(*emu_buf);
252 bw = FFMIN(w - x, mb_width);
253 bh = FFMIN(h - y, 16);
255 for (j = 0; j < bh; j++) {
256 memcpy(emu_buf + j * 16,
257 (const uint8_t*)src + j * linesize,
259 pix = emu_buf[j * 16 + bw - 1];
260 for (k = bw; k < mb_width; k++)
261 emu_buf[j * 16 + k] = pix;
264 memcpy(emu_buf + j * 16,
265 emu_buf + (bh - 1) * 16,
266 mb_width * sizeof(*emu_buf));
269 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
271 if (blocks_per_mb > 2) {
272 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
275 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
277 if (blocks_per_mb > 2) {
278 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
282 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
284 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
286 if (blocks_per_mb > 2) {
287 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
289 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
298 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
299 int linesize, int x, int y, int w, int h,
300 int16_t *blocks, int mbs_per_slice, int abits)
302 const int slice_width = 16 * mbs_per_slice;
303 int i, j, copy_w, copy_h;
305 copy_w = FFMIN(w - x, slice_width);
306 copy_h = FFMIN(h - y, 16);
307 for (i = 0; i < copy_h; i++) {
308 memcpy(blocks, src, copy_w * sizeof(*src));
310 for (j = 0; j < copy_w; j++)
313 for (j = 0; j < copy_w; j++)
314 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
315 for (j = copy_w; j < slice_width; j++)
316 blocks[j] = blocks[copy_w - 1];
317 blocks += slice_width;
318 src += linesize >> 1;
320 for (; i < 16; i++) {
321 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
322 blocks += slice_width;
327 * Write an unsigned rice/exp golomb codeword.
329 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
331 unsigned int rice_order, exp_order, switch_bits, switch_val;
334 /* number of prefix bits to switch between Rice and expGolomb */
335 switch_bits = (codebook & 3) + 1;
336 rice_order = codebook >> 5; /* rice code order */
337 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
339 switch_val = switch_bits << rice_order;
341 if (val >= switch_val) {
342 val -= switch_val - (1 << exp_order);
343 exponent = av_log2(val);
345 put_bits(pb, exponent - exp_order + switch_bits, 0);
346 put_bits(pb, exponent + 1, val);
348 exponent = val >> rice_order;
351 put_bits(pb, exponent, 0);
354 put_sbits(pb, rice_order, val);
358 #define GET_SIGN(x) ((x) >> 31)
359 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
361 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
362 int blocks_per_slice, int scale)
365 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
367 prev_dc = (blocks[0] - 0x4000) / scale;
368 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
373 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
374 dc = (blocks[0] - 0x4000) / scale;
375 delta = dc - prev_dc;
376 new_sign = GET_SIGN(delta);
377 delta = (delta ^ sign) - sign;
378 code = MAKE_CODE(delta);
379 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
380 codebook = (code + (code & 1)) >> 1;
381 codebook = FFMIN(codebook, 3);
387 static void encode_acs(PutBitContext *pb, int16_t *blocks,
388 int blocks_per_slice,
389 int plane_size_factor,
390 const uint8_t *scan, const int16_t *qmat)
393 int run, level, run_cb, lev_cb;
394 int max_coeffs, abs_level;
396 max_coeffs = blocks_per_slice << 6;
397 run_cb = ff_prores_run_to_cb_index[4];
398 lev_cb = ff_prores_lev_to_cb_index[2];
401 for (i = 1; i < 64; i++) {
402 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
403 level = blocks[idx] / qmat[scan[i]];
405 abs_level = FFABS(level);
406 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
407 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
409 put_sbits(pb, 1, GET_SIGN(level));
411 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
412 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
421 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
422 const uint16_t *src, int linesize,
423 int mbs_per_slice, int16_t *blocks,
424 int blocks_per_mb, int plane_size_factor,
427 int blocks_per_slice, saved_pos;
429 saved_pos = put_bits_count(pb);
430 blocks_per_slice = mbs_per_slice * blocks_per_mb;
432 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
433 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
434 ctx->scantable, qmat);
437 return (put_bits_count(pb) - saved_pos) >> 3;
440 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
442 const int mask = (1 << abits) - 1;
443 const int dbits = (abits == 8) ? 4 : 7;
444 const int dsize = 1 << dbits - 1;
445 int diff = cur - prev;
448 if (diff >= (1 << abits) - dsize)
450 if (diff < -dsize || diff > dsize || !diff) {
452 put_bits(pb, abits, diff);
455 put_bits(pb, dbits - 1, FFABS(diff) - 1);
456 put_bits(pb, 1, diff < 0);
460 static void put_alpha_run(PutBitContext *pb, int run)
465 put_bits(pb, 4, run);
467 put_bits(pb, 15, run);
473 // todo alpha quantisation for high quants
474 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
475 int mbs_per_slice, uint16_t *blocks,
478 const int abits = ctx->alpha_bits;
479 const int mask = (1 << abits) - 1;
480 const int num_coeffs = mbs_per_slice * 256;
481 int saved_pos = put_bits_count(pb);
482 int prev = mask, cur;
487 put_alpha_diff(pb, cur, prev, abits);
492 put_alpha_run (pb, run);
493 put_alpha_diff(pb, cur, prev, abits);
499 } while (idx < num_coeffs);
501 put_alpha_run(pb, run);
503 return (put_bits_count(pb) - saved_pos) >> 3;
506 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
508 int sizes[4], int x, int y, int quant,
511 ProresContext *ctx = avctx->priv_data;
515 int slice_width_factor = av_log2(mbs_per_slice);
516 int num_cblocks, pwidth, linesize, line_add;
517 int plane_factor, is_chroma;
520 if (ctx->pictures_per_frame == 1)
523 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
525 if (ctx->force_quant) {
526 qmat = ctx->quants[0];
527 } else if (quant < MAX_STORED_Q) {
528 qmat = ctx->quants[quant];
530 qmat = ctx->custom_q;
531 for (i = 0; i < 64; i++)
532 qmat[i] = ctx->quant_mat[i] * quant;
535 for (i = 0; i < ctx->num_planes; i++) {
536 is_chroma = (i == 1 || i == 2);
537 plane_factor = slice_width_factor + 2;
539 plane_factor += ctx->chroma_factor - 3;
540 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
544 pwidth = avctx->width;
549 pwidth = avctx->width >> 1;
552 linesize = pic->linesize[i] * ctx->pictures_per_frame;
553 src = (const uint16_t*)(pic->data[i] + yp * linesize +
554 line_add * pic->linesize[i]) + xp;
557 get_slice_data(ctx, src, linesize, xp, yp,
558 pwidth, avctx->height / ctx->pictures_per_frame,
559 ctx->blocks[0], ctx->emu_buf,
560 mbs_per_slice, num_cblocks, is_chroma);
561 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
562 mbs_per_slice, ctx->blocks[0],
563 num_cblocks, plane_factor,
566 get_alpha_data(ctx, src, linesize, xp, yp,
567 pwidth, avctx->height / ctx->pictures_per_frame,
568 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
569 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
570 ctx->blocks[0], quant);
572 total_size += sizes[i];
573 if (put_bits_left(pb) < 0) {
574 av_log(avctx, AV_LOG_ERROR,
575 "Underestimated required buffer size.\n");
582 static inline int estimate_vlc(unsigned codebook, int val)
584 unsigned int rice_order, exp_order, switch_bits, switch_val;
587 /* number of prefix bits to switch between Rice and expGolomb */
588 switch_bits = (codebook & 3) + 1;
589 rice_order = codebook >> 5; /* rice code order */
590 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
592 switch_val = switch_bits << rice_order;
594 if (val >= switch_val) {
595 val -= switch_val - (1 << exp_order);
596 exponent = av_log2(val);
598 return exponent * 2 - exp_order + switch_bits + 1;
600 return (val >> rice_order) + rice_order + 1;
604 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
608 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
611 prev_dc = (blocks[0] - 0x4000) / scale;
612 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
616 *error += FFABS(blocks[0] - 0x4000) % scale;
618 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
619 dc = (blocks[0] - 0x4000) / scale;
620 *error += FFABS(blocks[0] - 0x4000) % scale;
621 delta = dc - prev_dc;
622 new_sign = GET_SIGN(delta);
623 delta = (delta ^ sign) - sign;
624 code = MAKE_CODE(delta);
625 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
626 codebook = (code + (code & 1)) >> 1;
627 codebook = FFMIN(codebook, 3);
635 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
636 int plane_size_factor,
637 const uint8_t *scan, const int16_t *qmat)
640 int run, level, run_cb, lev_cb;
641 int max_coeffs, abs_level;
644 max_coeffs = blocks_per_slice << 6;
645 run_cb = ff_prores_run_to_cb_index[4];
646 lev_cb = ff_prores_lev_to_cb_index[2];
649 for (i = 1; i < 64; i++) {
650 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
651 level = blocks[idx] / qmat[scan[i]];
652 *error += FFABS(blocks[idx]) % qmat[scan[i]];
654 abs_level = FFABS(level);
655 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
656 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
659 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
660 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
671 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
672 const uint16_t *src, int linesize,
674 int blocks_per_mb, int plane_size_factor,
675 const int16_t *qmat, ProresThreadData *td)
677 int blocks_per_slice;
680 blocks_per_slice = mbs_per_slice * blocks_per_mb;
682 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
683 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
684 plane_size_factor, ctx->scantable, qmat);
686 return FFALIGN(bits, 8);
689 static int est_alpha_diff(int cur, int prev, int abits)
691 const int mask = (1 << abits) - 1;
692 const int dbits = (abits == 8) ? 4 : 7;
693 const int dsize = 1 << dbits - 1;
694 int diff = cur - prev;
697 if (diff >= (1 << abits) - dsize)
699 if (diff < -dsize || diff > dsize || !diff)
705 static int estimate_alpha_plane(ProresContext *ctx, int *error,
706 const uint16_t *src, int linesize,
707 int mbs_per_slice, int quant,
710 const int abits = ctx->alpha_bits;
711 const int mask = (1 << abits) - 1;
712 const int num_coeffs = mbs_per_slice * 256;
713 int prev = mask, cur;
720 bits = est_alpha_diff(cur, prev, abits);
731 bits += est_alpha_diff(cur, prev, abits);
737 } while (idx < num_coeffs);
749 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
750 int trellis_node, int x, int y, int mbs_per_slice,
751 ProresThreadData *td)
753 ProresContext *ctx = avctx->priv_data;
754 int i, q, pq, xp, yp;
756 int slice_width_factor = av_log2(mbs_per_slice);
757 int num_cblocks[MAX_PLANES], pwidth;
758 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
759 const int min_quant = ctx->profile_info->min_quant;
760 const int max_quant = ctx->profile_info->max_quant;
761 int error, bits, bits_limit;
762 int mbs, prev, cur, new_score;
763 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
766 int linesize[4], line_add;
768 if (ctx->pictures_per_frame == 1)
771 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
772 mbs = x + mbs_per_slice;
774 for (i = 0; i < ctx->num_planes; i++) {
775 is_chroma[i] = (i == 1 || i == 2);
776 plane_factor[i] = slice_width_factor + 2;
778 plane_factor[i] += ctx->chroma_factor - 3;
779 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
783 pwidth = avctx->width;
788 pwidth = avctx->width >> 1;
791 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
792 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
793 line_add * pic->linesize[i]) + xp;
796 get_slice_data(ctx, src, linesize[i], xp, yp,
797 pwidth, avctx->height / ctx->pictures_per_frame,
798 td->blocks[i], td->emu_buf,
799 mbs_per_slice, num_cblocks[i], is_chroma[i]);
801 get_alpha_data(ctx, src, linesize[i], xp, yp,
802 pwidth, avctx->height / ctx->pictures_per_frame,
803 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
807 for (q = min_quant; q < max_quant + 2; q++) {
808 td->nodes[trellis_node + q].prev_node = -1;
809 td->nodes[trellis_node + q].quant = q;
812 // todo: maybe perform coarser quantising to fit into frame size when needed
813 for (q = min_quant; q <= max_quant; q++) {
816 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
817 bits += estimate_slice_plane(ctx, &error, i,
820 num_cblocks[i], plane_factor[i],
824 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
825 mbs_per_slice, q, td->blocks[3]);
826 if (bits > 65000 * 8) {
830 slice_bits[q] = bits;
831 slice_score[q] = error;
833 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
834 slice_bits[max_quant + 1] = slice_bits[max_quant];
835 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
836 overquant = max_quant;
838 for (q = max_quant + 1; q < 128; q++) {
841 if (q < MAX_STORED_Q) {
842 qmat = ctx->quants[q];
845 for (i = 0; i < 64; i++)
846 qmat[i] = ctx->quant_mat[i] * q;
848 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
849 bits += estimate_slice_plane(ctx, &error, i,
852 num_cblocks[i], plane_factor[i],
856 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
857 mbs_per_slice, q, td->blocks[3]);
858 if (bits <= ctx->bits_per_mb * mbs_per_slice)
862 slice_bits[max_quant + 1] = bits;
863 slice_score[max_quant + 1] = error;
866 td->nodes[trellis_node + max_quant + 1].quant = overquant;
868 bits_limit = mbs * ctx->bits_per_mb;
869 for (pq = min_quant; pq < max_quant + 2; pq++) {
870 prev = trellis_node - TRELLIS_WIDTH + pq;
872 for (q = min_quant; q < max_quant + 2; q++) {
873 cur = trellis_node + q;
875 bits = td->nodes[prev].bits + slice_bits[q];
876 error = slice_score[q];
877 if (bits > bits_limit)
880 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
881 new_score = td->nodes[prev].score + error;
883 new_score = SCORE_LIMIT;
884 if (td->nodes[cur].prev_node == -1 ||
885 td->nodes[cur].score >= new_score) {
887 td->nodes[cur].bits = bits;
888 td->nodes[cur].score = new_score;
889 td->nodes[cur].prev_node = prev;
894 error = td->nodes[trellis_node + min_quant].score;
895 pq = trellis_node + min_quant;
896 for (q = min_quant + 1; q < max_quant + 2; q++) {
897 if (td->nodes[trellis_node + q].score <= error) {
898 error = td->nodes[trellis_node + q].score;
899 pq = trellis_node + q;
906 static int find_quant_thread(AVCodecContext *avctx, void *arg,
907 int jobnr, int threadnr)
909 ProresContext *ctx = avctx->priv_data;
910 ProresThreadData *td = ctx->tdata + threadnr;
911 int mbs_per_slice = ctx->mbs_per_slice;
912 int x, y = jobnr, mb, q = 0;
914 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
915 while (ctx->mb_width - x < mbs_per_slice)
917 q = find_slice_quant(avctx, avctx->coded_frame,
918 (mb + 1) * TRELLIS_WIDTH, x, y,
922 for (x = ctx->slices_width - 1; x >= 0; x--) {
923 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
924 q = td->nodes[q].prev_node;
930 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
931 const AVFrame *pic, int *got_packet)
933 ProresContext *ctx = avctx->priv_data;
934 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
935 uint8_t *picture_size_pos;
937 int x, y, i, mb, q = 0;
938 int sizes[4] = { 0 };
939 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
940 int frame_size, picture_size, slice_size;
942 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
945 *avctx->coded_frame = *pic;
946 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
947 avctx->coded_frame->key_frame = 1;
949 pkt_size = ctx->frame_size_upper_bound;
951 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
954 orig_buf = pkt->data;
957 orig_buf += 4; // frame size
958 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
963 buf += 2; // frame header size will be stored here
964 bytestream_put_be16 (&buf, 0); // version 1
965 bytestream_put_buffer(&buf, ctx->vendor, 4);
966 bytestream_put_be16 (&buf, avctx->width);
967 bytestream_put_be16 (&buf, avctx->height);
969 frame_flags = ctx->chroma_factor << 6;
970 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
971 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
972 bytestream_put_byte (&buf, frame_flags);
974 bytestream_put_byte (&buf, 0); // reserved
975 bytestream_put_byte (&buf, avctx->color_primaries);
976 bytestream_put_byte (&buf, avctx->color_trc);
977 bytestream_put_byte (&buf, avctx->colorspace);
978 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
979 bytestream_put_byte (&buf, 0); // reserved
980 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
981 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
982 // luma quantisation matrix
983 for (i = 0; i < 64; i++)
984 bytestream_put_byte(&buf, ctx->quant_mat[i]);
985 // chroma quantisation matrix
986 for (i = 0; i < 64; i++)
987 bytestream_put_byte(&buf, ctx->quant_mat[i]);
989 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
991 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
993 for (ctx->cur_picture_idx = 0;
994 ctx->cur_picture_idx < ctx->pictures_per_frame;
995 ctx->cur_picture_idx++) {
997 picture_size_pos = buf + 1;
998 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
999 buf += 4; // picture data size will be stored here
1000 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1001 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1003 // seek table - will be filled during slice encoding
1005 buf += ctx->slices_per_picture * 2;
1008 if (!ctx->force_quant) {
1009 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1015 for (y = 0; y < ctx->mb_height; y++) {
1016 int mbs_per_slice = ctx->mbs_per_slice;
1017 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1018 q = ctx->force_quant ? ctx->force_quant
1019 : ctx->slice_q[mb + y * ctx->slices_width];
1021 while (ctx->mb_width - x < mbs_per_slice)
1022 mbs_per_slice >>= 1;
1024 bytestream_put_byte(&buf, slice_hdr_size << 3);
1026 buf += slice_hdr_size - 1;
1027 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1028 uint8_t *start = pkt->data;
1029 // Recompute new size according to max_slice_size
1031 int delta = 200 + (ctx->pictures_per_frame *
1032 ctx->slices_per_picture + 1) *
1033 max_slice_size - pkt_size;
1035 delta = FFMAX(delta, 2 * max_slice_size);
1036 ctx->frame_size_upper_bound += delta;
1039 avpriv_request_sample(avctx,
1040 "Packet too small: is %i,"
1041 " needs %i (slice: %i). "
1042 "Correct allocation",
1043 pkt_size, delta, max_slice_size);
1047 ret = av_grow_packet(pkt, delta);
1053 orig_buf = pkt->data + (orig_buf - start);
1054 buf = pkt->data + (buf - start);
1055 picture_size_pos = pkt->data + (picture_size_pos - start);
1056 slice_sizes = pkt->data + (slice_sizes - start);
1057 slice_hdr = pkt->data + (slice_hdr - start);
1058 tmp = pkt->data + (tmp - start);
1060 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1061 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1066 bytestream_put_byte(&slice_hdr, q);
1067 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1068 for (i = 0; i < ctx->num_planes - 1; i++) {
1069 bytestream_put_be16(&slice_hdr, sizes[i]);
1070 slice_size += sizes[i];
1072 bytestream_put_be16(&slice_sizes, slice_size);
1073 buf += slice_size - slice_hdr_size;
1074 if (max_slice_size < slice_size)
1075 max_slice_size = slice_size;
1079 picture_size = buf - (picture_size_pos - 1);
1080 bytestream_put_be32(&picture_size_pos, picture_size);
1084 frame_size = buf - orig_buf;
1085 bytestream_put_be32(&orig_buf, frame_size);
1087 pkt->size = frame_size;
1088 pkt->flags |= AV_PKT_FLAG_KEY;
1094 static av_cold int encode_close(AVCodecContext *avctx)
1096 ProresContext *ctx = avctx->priv_data;
1099 av_freep(&avctx->coded_frame);
1102 for (i = 0; i < avctx->thread_count; i++)
1103 av_free(ctx->tdata[i].nodes);
1105 av_freep(&ctx->tdata);
1106 av_freep(&ctx->slice_q);
1111 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1112 int linesize, int16_t *block)
1115 const uint16_t *tsrc = src;
1117 for (y = 0; y < 8; y++) {
1118 for (x = 0; x < 8; x++)
1119 block[y * 8 + x] = tsrc[x];
1120 tsrc += linesize >> 1;
1125 static av_cold int encode_init(AVCodecContext *avctx)
1127 ProresContext *ctx = avctx->priv_data;
1130 int min_quant, max_quant;
1131 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1133 avctx->bits_per_raw_sample = 10;
1134 avctx->coded_frame = av_frame_alloc();
1135 if (!avctx->coded_frame)
1136 return AVERROR(ENOMEM);
1138 ctx->fdct = prores_fdct;
1139 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1140 : ff_prores_progressive_scan;
1141 ff_fdctdsp_init(&ctx->fdsp, avctx);
1143 mps = ctx->mbs_per_slice;
1144 if (mps & (mps - 1)) {
1145 av_log(avctx, AV_LOG_ERROR,
1146 "there should be an integer power of two MBs per slice\n");
1147 return AVERROR(EINVAL);
1149 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1150 if (ctx->alpha_bits & 7) {
1151 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1152 return AVERROR(EINVAL);
1155 ctx->alpha_bits = 0;
1158 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1161 ctx->profile_info = prores_profile_info + ctx->profile;
1162 ctx->num_planes = 3 + !!ctx->alpha_bits;
1164 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1167 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1169 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1171 ctx->slices_width = ctx->mb_width / mps;
1172 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1173 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1174 ctx->pictures_per_frame = 1 + interlaced;
1176 if (ctx->quant_sel == -1)
1177 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1179 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1181 if (strlen(ctx->vendor) != 4) {
1182 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1183 return AVERROR_INVALIDDATA;
1186 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1187 if (!ctx->force_quant) {
1188 if (!ctx->bits_per_mb) {
1189 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1190 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1191 ctx->pictures_per_frame)
1193 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1194 } else if (ctx->bits_per_mb < 128) {
1195 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1196 return AVERROR_INVALIDDATA;
1199 min_quant = ctx->profile_info->min_quant;
1200 max_quant = ctx->profile_info->max_quant;
1201 for (i = min_quant; i < MAX_STORED_Q; i++) {
1202 for (j = 0; j < 64; j++)
1203 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1206 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1207 if (!ctx->slice_q) {
1208 encode_close(avctx);
1209 return AVERROR(ENOMEM);
1212 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1214 encode_close(avctx);
1215 return AVERROR(ENOMEM);
1218 for (j = 0; j < avctx->thread_count; j++) {
1219 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1221 * sizeof(*ctx->tdata->nodes));
1222 if (!ctx->tdata[j].nodes) {
1223 encode_close(avctx);
1224 return AVERROR(ENOMEM);
1226 for (i = min_quant; i < max_quant + 2; i++) {
1227 ctx->tdata[j].nodes[i].prev_node = -1;
1228 ctx->tdata[j].nodes[i].bits = 0;
1229 ctx->tdata[j].nodes[i].score = 0;
1235 if (ctx->force_quant > 64) {
1236 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1237 return AVERROR_INVALIDDATA;
1240 for (j = 0; j < 64; j++) {
1241 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1242 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1245 ctx->bits_per_mb = ls * 8;
1246 if (ctx->chroma_factor == CFACTOR_Y444)
1247 ctx->bits_per_mb += ls * 4;
1250 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1251 ctx->slices_per_picture + 1) *
1252 (2 + 2 * ctx->num_planes +
1253 (mps * ctx->bits_per_mb) / 8)
1256 if (ctx->alpha_bits) {
1257 // The alpha plane is run-coded and might exceed the bit budget.
1258 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1259 ctx->slices_per_picture + 1) *
1260 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1261 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1264 avctx->codec_tag = ctx->profile_info->tag;
1266 av_log(avctx, AV_LOG_DEBUG,
1267 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1268 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1269 interlaced ? "yes" : "no", ctx->bits_per_mb);
1270 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1271 ctx->frame_size_upper_bound);
1276 #define OFFSET(x) offsetof(ProresContext, x)
1277 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1279 static const AVOption options[] = {
1280 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1281 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1282 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1283 { .i64 = PRORES_PROFILE_STANDARD },
1284 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1285 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1286 0, 0, VE, "profile" },
1287 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1288 0, 0, VE, "profile" },
1289 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1290 0, 0, VE, "profile" },
1291 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1292 0, 0, VE, "profile" },
1293 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1294 0, 0, VE, "profile" },
1295 { "vendor", "vendor ID", OFFSET(vendor),
1296 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1297 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1298 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1299 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1300 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1301 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1302 0, 0, VE, "quant_mat" },
1303 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1304 0, 0, VE, "quant_mat" },
1305 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1306 0, 0, VE, "quant_mat" },
1307 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1308 0, 0, VE, "quant_mat" },
1309 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1310 0, 0, VE, "quant_mat" },
1311 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1312 0, 0, VE, "quant_mat" },
1313 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1314 { .i64 = 16 }, 0, 16, VE },
1318 static const AVClass proresenc_class = {
1319 .class_name = "ProRes encoder",
1320 .item_name = av_default_item_name,
1322 .version = LIBAVUTIL_VERSION_INT,
1325 AVCodec ff_prores_ks_encoder = {
1326 .name = "prores_ks",
1327 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1328 .type = AVMEDIA_TYPE_VIDEO,
1329 .id = AV_CODEC_ID_PRORES,
1330 .priv_data_size = sizeof(ProresContext),
1331 .init = encode_init,
1332 .close = encode_close,
1333 .encode2 = encode_frame,
1334 .capabilities = CODEC_CAP_SLICE_THREADS,
1335 .pix_fmts = (const enum AVPixelFormat[]) {
1336 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1337 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1339 .priv_class = &proresenc_class,