4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
32 #include "bytestream.h"
34 #include "proresdata.h"
36 #define CFACTOR_Y422 2
37 #define CFACTOR_Y444 3
39 #define MAX_MBS_PER_SLICE 8
44 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_STANDARD,
59 static const uint8_t prores_quant_matrices[][64] = {
61 4, 7, 9, 11, 13, 14, 15, 63,
62 7, 7, 11, 12, 14, 15, 63, 63,
63 9, 11, 13, 14, 15, 63, 63, 63,
64 11, 11, 13, 14, 63, 63, 63, 63,
65 11, 13, 14, 63, 63, 63, 63, 63,
66 13, 14, 63, 63, 63, 63, 63, 63,
67 13, 63, 63, 63, 63, 63, 63, 63,
68 63, 63, 63, 63, 63, 63, 63, 63,
71 4, 5, 6, 7, 9, 11, 13, 15,
72 5, 5, 7, 8, 11, 13, 15, 17,
73 6, 7, 9, 11, 13, 15, 15, 17,
74 7, 7, 9, 11, 13, 15, 17, 19,
75 7, 9, 11, 13, 14, 16, 19, 23,
76 9, 11, 13, 14, 16, 19, 23, 29,
77 9, 11, 13, 15, 17, 21, 28, 35,
78 11, 13, 16, 17, 21, 28, 35, 41,
81 4, 4, 5, 5, 6, 7, 7, 9,
82 4, 4, 5, 6, 7, 7, 9, 9,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 6, 7, 7, 8, 9, 10, 12,
86 6, 7, 7, 8, 9, 10, 12, 15,
87 6, 7, 7, 9, 10, 11, 14, 17,
88 7, 7, 9, 10, 11, 14, 17, 21,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 5,
95 4, 4, 4, 4, 4, 4, 5, 5,
96 4, 4, 4, 4, 4, 5, 5, 6,
97 4, 4, 4, 4, 5, 5, 6, 7,
98 4, 4, 4, 4, 5, 6, 7, 7,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
112 #define NUM_MB_LIMITS 4
113 static const int prores_mb_limits[NUM_MB_LIMITS] = {
114 1620, // up to 720x576
115 2700, // up to 960x720
116 6075, // up to 1440x1080
117 9216, // up to 2048x1152
120 static const struct prores_profile {
121 const char *full_name;
125 int br_tab[NUM_MB_LIMITS];
127 } prores_profile_info[5] = {
129 .full_name = "proxy",
130 .tag = MKTAG('a', 'p', 'c', 'o'),
133 .br_tab = { 300, 242, 220, 194 },
134 .quant = QUANT_MAT_PROXY,
138 .tag = MKTAG('a', 'p', 'c', 's'),
141 .br_tab = { 720, 560, 490, 440 },
142 .quant = QUANT_MAT_LT,
145 .full_name = "standard",
146 .tag = MKTAG('a', 'p', 'c', 'n'),
149 .br_tab = { 1050, 808, 710, 632 },
150 .quant = QUANT_MAT_STANDARD,
153 .full_name = "high quality",
154 .tag = MKTAG('a', 'p', 'c', 'h'),
157 .br_tab = { 1566, 1216, 1070, 950 },
158 .quant = QUANT_MAT_HQ,
162 .tag = MKTAG('a', 'p', '4', 'h'),
165 .br_tab = { 2350, 1828, 1600, 1425 },
166 .quant = QUANT_MAT_HQ,
170 #define TRELLIS_WIDTH 16
171 #define SCORE_LIMIT INT_MAX / 2
180 #define MAX_STORED_Q 16
182 typedef struct ProresThreadData {
183 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
184 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
185 int16_t custom_q[64];
186 struct TrellisNode *nodes;
189 typedef struct ProresContext {
191 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
192 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
193 int16_t quants[MAX_STORED_Q][64];
194 int16_t custom_q[64];
195 const uint8_t *quant_mat;
196 const uint8_t *scantable;
198 void (* fdct)(DSPContext *dsp, const uint16_t *src,
199 int linesize, int16_t *block);
202 int mb_width, mb_height;
204 int num_chroma_blocks, chroma_factor;
206 int slices_per_picture;
207 int pictures_per_frame; // 1 for progressive, 2 for interlaced
217 int frame_size_upper_bound;
220 const struct prores_profile *profile_info;
224 ProresThreadData *tdata;
227 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
228 int linesize, int x, int y, int w, int h,
229 int16_t *blocks, uint16_t *emu_buf,
230 int mbs_per_slice, int blocks_per_mb, int is_chroma)
232 const uint16_t *esrc;
233 const int mb_width = 4 * blocks_per_mb;
237 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
239 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
243 if (x + mb_width <= w && y + 16 <= h) {
245 elinesize = linesize;
250 elinesize = 16 * sizeof(*emu_buf);
252 bw = FFMIN(w - x, mb_width);
253 bh = FFMIN(h - y, 16);
255 for (j = 0; j < bh; j++) {
256 memcpy(emu_buf + j * 16,
257 (const uint8_t*)src + j * linesize,
259 pix = emu_buf[j * 16 + bw - 1];
260 for (k = bw; k < mb_width; k++)
261 emu_buf[j * 16 + k] = pix;
264 memcpy(emu_buf + j * 16,
265 emu_buf + (bh - 1) * 16,
266 mb_width * sizeof(*emu_buf));
269 ctx->fdct(&ctx->dsp, esrc, elinesize, blocks);
271 if (blocks_per_mb > 2) {
272 ctx->fdct(&ctx->dsp, esrc + 8, elinesize, blocks);
275 ctx->fdct(&ctx->dsp, esrc + elinesize * 4, elinesize, blocks);
277 if (blocks_per_mb > 2) {
278 ctx->fdct(&ctx->dsp, esrc + elinesize * 4 + 8, elinesize, blocks);
282 ctx->fdct(&ctx->dsp, esrc, elinesize, blocks);
284 ctx->fdct(&ctx->dsp, esrc + elinesize * 4, elinesize, blocks);
286 if (blocks_per_mb > 2) {
287 ctx->fdct(&ctx->dsp, esrc + 8, elinesize, blocks);
289 ctx->fdct(&ctx->dsp, esrc + elinesize * 4 + 8, elinesize, blocks);
298 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
299 int linesize, int x, int y, int w, int h,
300 int16_t *blocks, int mbs_per_slice, int abits)
302 const int slice_width = 16 * mbs_per_slice;
303 int i, j, copy_w, copy_h;
305 copy_w = FFMIN(w - x, slice_width);
306 copy_h = FFMIN(h - y, 16);
307 for (i = 0; i < copy_h; i++) {
308 memcpy(blocks, src, copy_w * sizeof(*src));
310 for (j = 0; j < copy_w; j++)
313 for (j = 0; j < copy_w; j++)
314 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
315 for (j = copy_w; j < slice_width; j++)
316 blocks[j] = blocks[copy_w - 1];
317 blocks += slice_width;
318 src += linesize >> 1;
320 for (; i < 16; i++) {
321 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
322 blocks += slice_width;
327 * Write an unsigned rice/exp golomb codeword.
329 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
331 unsigned int rice_order, exp_order, switch_bits, switch_val;
334 /* number of prefix bits to switch between Rice and expGolomb */
335 switch_bits = (codebook & 3) + 1;
336 rice_order = codebook >> 5; /* rice code order */
337 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
339 switch_val = switch_bits << rice_order;
341 if (val >= switch_val) {
342 val -= switch_val - (1 << exp_order);
343 exponent = av_log2(val);
345 put_bits(pb, exponent - exp_order + switch_bits, 0);
346 put_bits(pb, exponent + 1, val);
348 exponent = val >> rice_order;
351 put_bits(pb, exponent, 0);
354 put_sbits(pb, rice_order, val);
358 #define GET_SIGN(x) ((x) >> 31)
359 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
361 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
362 int blocks_per_slice, int scale)
365 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
367 prev_dc = (blocks[0] - 0x4000) / scale;
368 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
373 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
374 dc = (blocks[0] - 0x4000) / scale;
375 delta = dc - prev_dc;
376 new_sign = GET_SIGN(delta);
377 delta = (delta ^ sign) - sign;
378 code = MAKE_CODE(delta);
379 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
380 codebook = (code + (code & 1)) >> 1;
381 codebook = FFMIN(codebook, 3);
387 static void encode_acs(PutBitContext *pb, int16_t *blocks,
388 int blocks_per_slice,
389 int plane_size_factor,
390 const uint8_t *scan, const int16_t *qmat)
393 int run, level, run_cb, lev_cb;
394 int max_coeffs, abs_level;
396 max_coeffs = blocks_per_slice << 6;
397 run_cb = ff_prores_run_to_cb_index[4];
398 lev_cb = ff_prores_lev_to_cb_index[2];
401 for (i = 1; i < 64; i++) {
402 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
403 level = blocks[idx] / qmat[scan[i]];
405 abs_level = FFABS(level);
406 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
407 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
409 put_sbits(pb, 1, GET_SIGN(level));
411 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
412 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
421 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
422 const uint16_t *src, int linesize,
423 int mbs_per_slice, int16_t *blocks,
424 int blocks_per_mb, int plane_size_factor,
427 int blocks_per_slice, saved_pos;
429 saved_pos = put_bits_count(pb);
430 blocks_per_slice = mbs_per_slice * blocks_per_mb;
432 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
433 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
434 ctx->scantable, qmat);
437 return (put_bits_count(pb) - saved_pos) >> 3;
440 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
442 const int mask = (1 << abits) - 1;
443 const int dbits = (abits == 8) ? 4 : 7;
444 const int dsize = 1 << dbits - 1;
445 int diff = cur - prev;
448 if (diff >= (1 << abits) - dsize)
450 if (diff < -dsize || diff > dsize || !diff) {
452 put_bits(pb, abits, diff);
455 put_bits(pb, dbits - 1, FFABS(diff) - 1);
456 put_bits(pb, 1, diff < 0);
460 static void put_alpha_run(PutBitContext *pb, int run)
465 put_bits(pb, 4, run);
467 put_bits(pb, 15, run);
473 // todo alpha quantisation for high quants
474 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
475 const uint16_t *src, int linesize,
476 int mbs_per_slice, uint16_t *blocks,
479 const int abits = ctx->alpha_bits;
480 const int mask = (1 << abits) - 1;
481 const int num_coeffs = mbs_per_slice * 256;
482 int saved_pos = put_bits_count(pb);
483 int prev = mask, cur;
488 put_alpha_diff(pb, cur, prev, abits);
493 put_alpha_run (pb, run);
494 put_alpha_diff(pb, cur, prev, abits);
500 } while (idx < num_coeffs);
502 put_alpha_run(pb, run);
504 return (put_bits_count(pb) - saved_pos) >> 3;
507 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
509 int sizes[4], int x, int y, int quant,
512 ProresContext *ctx = avctx->priv_data;
516 int slice_width_factor = av_log2(mbs_per_slice);
517 int num_cblocks, pwidth, linesize, line_add;
518 int plane_factor, is_chroma;
521 if (ctx->pictures_per_frame == 1)
524 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
526 if (ctx->force_quant) {
527 qmat = ctx->quants[0];
528 } else if (quant < MAX_STORED_Q) {
529 qmat = ctx->quants[quant];
531 qmat = ctx->custom_q;
532 for (i = 0; i < 64; i++)
533 qmat[i] = ctx->quant_mat[i] * quant;
536 for (i = 0; i < ctx->num_planes; i++) {
537 is_chroma = (i == 1 || i == 2);
538 plane_factor = slice_width_factor + 2;
540 plane_factor += ctx->chroma_factor - 3;
541 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
545 pwidth = avctx->width;
550 pwidth = avctx->width >> 1;
553 linesize = pic->linesize[i] * ctx->pictures_per_frame;
554 src = (const uint16_t*)(pic->data[i] + yp * linesize +
555 line_add * pic->linesize[i]) + xp;
558 get_slice_data(ctx, src, linesize, xp, yp,
559 pwidth, avctx->height / ctx->pictures_per_frame,
560 ctx->blocks[0], ctx->emu_buf,
561 mbs_per_slice, num_cblocks, is_chroma);
562 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
563 mbs_per_slice, ctx->blocks[0],
564 num_cblocks, plane_factor,
567 get_alpha_data(ctx, src, linesize, xp, yp,
568 pwidth, avctx->height / ctx->pictures_per_frame,
569 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
570 sizes[i] = encode_alpha_plane(ctx, pb, src, linesize,
571 mbs_per_slice, ctx->blocks[0],
574 total_size += sizes[i];
579 static inline int estimate_vlc(unsigned codebook, int val)
581 unsigned int rice_order, exp_order, switch_bits, switch_val;
584 /* number of prefix bits to switch between Rice and expGolomb */
585 switch_bits = (codebook & 3) + 1;
586 rice_order = codebook >> 5; /* rice code order */
587 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
589 switch_val = switch_bits << rice_order;
591 if (val >= switch_val) {
592 val -= switch_val - (1 << exp_order);
593 exponent = av_log2(val);
595 return exponent * 2 - exp_order + switch_bits + 1;
597 return (val >> rice_order) + rice_order + 1;
601 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
605 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
608 prev_dc = (blocks[0] - 0x4000) / scale;
609 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
613 *error += FFABS(blocks[0] - 0x4000) % scale;
615 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
616 dc = (blocks[0] - 0x4000) / scale;
617 *error += FFABS(blocks[0] - 0x4000) % scale;
618 delta = dc - prev_dc;
619 new_sign = GET_SIGN(delta);
620 delta = (delta ^ sign) - sign;
621 code = MAKE_CODE(delta);
622 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
623 codebook = (code + (code & 1)) >> 1;
624 codebook = FFMIN(codebook, 3);
632 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
633 int plane_size_factor,
634 const uint8_t *scan, const int16_t *qmat)
637 int run, level, run_cb, lev_cb;
638 int max_coeffs, abs_level;
641 max_coeffs = blocks_per_slice << 6;
642 run_cb = ff_prores_run_to_cb_index[4];
643 lev_cb = ff_prores_lev_to_cb_index[2];
646 for (i = 1; i < 64; i++) {
647 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
648 level = blocks[idx] / qmat[scan[i]];
649 *error += FFABS(blocks[idx]) % qmat[scan[i]];
651 abs_level = FFABS(level);
652 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
653 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
656 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
657 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
668 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
669 const uint16_t *src, int linesize,
671 int blocks_per_mb, int plane_size_factor,
672 const int16_t *qmat, ProresThreadData *td)
674 int blocks_per_slice;
677 blocks_per_slice = mbs_per_slice * blocks_per_mb;
679 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
680 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
681 plane_size_factor, ctx->scantable, qmat);
683 return FFALIGN(bits, 8);
686 static int est_alpha_diff(int cur, int prev, int abits)
688 const int mask = (1 << abits) - 1;
689 const int dbits = (abits == 8) ? 4 : 7;
690 const int dsize = 1 << dbits - 1;
691 int diff = cur - prev;
694 if (diff >= (1 << abits) - dsize)
696 if (diff < -dsize || diff > dsize || !diff)
702 static int estimate_alpha_plane(ProresContext *ctx, int *error,
703 const uint16_t *src, int linesize,
704 int mbs_per_slice, int quant,
707 const int abits = ctx->alpha_bits;
708 const int mask = (1 << abits) - 1;
709 const int num_coeffs = mbs_per_slice * 256;
710 int prev = mask, cur;
717 bits = est_alpha_diff(cur, prev, abits);
728 bits += est_alpha_diff(cur, prev, abits);
734 } while (idx < num_coeffs);
746 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
747 int trellis_node, int x, int y, int mbs_per_slice,
748 ProresThreadData *td)
750 ProresContext *ctx = avctx->priv_data;
751 int i, q, pq, xp, yp;
753 int slice_width_factor = av_log2(mbs_per_slice);
754 int num_cblocks[MAX_PLANES], pwidth;
755 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
756 const int min_quant = ctx->profile_info->min_quant;
757 const int max_quant = ctx->profile_info->max_quant;
758 int error, bits, bits_limit;
759 int mbs, prev, cur, new_score;
760 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
763 int linesize[4], line_add;
765 if (ctx->pictures_per_frame == 1)
768 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
769 mbs = x + mbs_per_slice;
771 for (i = 0; i < ctx->num_planes; i++) {
772 is_chroma[i] = (i == 1 || i == 2);
773 plane_factor[i] = slice_width_factor + 2;
775 plane_factor[i] += ctx->chroma_factor - 3;
776 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
780 pwidth = avctx->width;
785 pwidth = avctx->width >> 1;
788 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
789 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
790 line_add * pic->linesize[i]) + xp;
793 get_slice_data(ctx, src, linesize[i], xp, yp,
794 pwidth, avctx->height / ctx->pictures_per_frame,
795 td->blocks[i], td->emu_buf,
796 mbs_per_slice, num_cblocks[i], is_chroma[i]);
798 get_alpha_data(ctx, src, linesize[i], xp, yp,
799 pwidth, avctx->height / ctx->pictures_per_frame,
800 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
804 for (q = min_quant; q < max_quant + 2; q++) {
805 td->nodes[trellis_node + q].prev_node = -1;
806 td->nodes[trellis_node + q].quant = q;
809 // todo: maybe perform coarser quantising to fit into frame size when needed
810 for (q = min_quant; q <= max_quant; q++) {
813 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
814 bits += estimate_slice_plane(ctx, &error, i,
817 num_cblocks[i], plane_factor[i],
821 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
822 mbs_per_slice, q, td->blocks[3]);
823 if (bits > 65000 * 8) {
827 slice_bits[q] = bits;
828 slice_score[q] = error;
830 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
831 slice_bits[max_quant + 1] = slice_bits[max_quant];
832 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
833 overquant = max_quant;
835 for (q = max_quant + 1; q < 128; q++) {
838 if (q < MAX_STORED_Q) {
839 qmat = ctx->quants[q];
842 for (i = 0; i < 64; i++)
843 qmat[i] = ctx->quant_mat[i] * q;
845 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
846 bits += estimate_slice_plane(ctx, &error, i,
849 num_cblocks[i], plane_factor[i],
853 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
854 mbs_per_slice, q, td->blocks[3]);
855 if (bits <= ctx->bits_per_mb * mbs_per_slice)
859 slice_bits[max_quant + 1] = bits;
860 slice_score[max_quant + 1] = error;
863 td->nodes[trellis_node + max_quant + 1].quant = overquant;
865 bits_limit = mbs * ctx->bits_per_mb;
866 for (pq = min_quant; pq < max_quant + 2; pq++) {
867 prev = trellis_node - TRELLIS_WIDTH + pq;
869 for (q = min_quant; q < max_quant + 2; q++) {
870 cur = trellis_node + q;
872 bits = td->nodes[prev].bits + slice_bits[q];
873 error = slice_score[q];
874 if (bits > bits_limit)
877 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
878 new_score = td->nodes[prev].score + error;
880 new_score = SCORE_LIMIT;
881 if (td->nodes[cur].prev_node == -1 ||
882 td->nodes[cur].score >= new_score) {
884 td->nodes[cur].bits = bits;
885 td->nodes[cur].score = new_score;
886 td->nodes[cur].prev_node = prev;
891 error = td->nodes[trellis_node + min_quant].score;
892 pq = trellis_node + min_quant;
893 for (q = min_quant + 1; q < max_quant + 2; q++) {
894 if (td->nodes[trellis_node + q].score <= error) {
895 error = td->nodes[trellis_node + q].score;
896 pq = trellis_node + q;
903 static int find_quant_thread(AVCodecContext *avctx, void *arg,
904 int jobnr, int threadnr)
906 ProresContext *ctx = avctx->priv_data;
907 ProresThreadData *td = ctx->tdata + threadnr;
908 int mbs_per_slice = ctx->mbs_per_slice;
909 int x, y = jobnr, mb, q = 0;
911 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
912 while (ctx->mb_width - x < mbs_per_slice)
914 q = find_slice_quant(avctx, avctx->coded_frame,
915 (mb + 1) * TRELLIS_WIDTH, x, y,
919 for (x = ctx->slices_width - 1; x >= 0; x--) {
920 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
921 q = td->nodes[q].prev_node;
927 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
928 const AVFrame *pic, int *got_packet)
930 ProresContext *ctx = avctx->priv_data;
931 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
932 uint8_t *picture_size_pos;
934 int x, y, i, mb, q = 0;
935 int sizes[4] = { 0 };
936 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
937 int frame_size, picture_size, slice_size;
941 *avctx->coded_frame = *pic;
942 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
943 avctx->coded_frame->key_frame = 1;
945 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
947 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
950 orig_buf = pkt->data;
953 orig_buf += 4; // frame size
954 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
959 buf += 2; // frame header size will be stored here
960 bytestream_put_be16 (&buf, 0); // version 1
961 bytestream_put_buffer(&buf, ctx->vendor, 4);
962 bytestream_put_be16 (&buf, avctx->width);
963 bytestream_put_be16 (&buf, avctx->height);
965 frame_flags = ctx->chroma_factor << 6;
966 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
967 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
968 bytestream_put_byte (&buf, frame_flags);
970 bytestream_put_byte (&buf, 0); // reserved
971 bytestream_put_byte (&buf, avctx->color_primaries);
972 bytestream_put_byte (&buf, avctx->color_trc);
973 bytestream_put_byte (&buf, avctx->colorspace);
974 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
975 bytestream_put_byte (&buf, 0); // reserved
976 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
977 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
978 // luma quantisation matrix
979 for (i = 0; i < 64; i++)
980 bytestream_put_byte(&buf, ctx->quant_mat[i]);
981 // chroma quantisation matrix
982 for (i = 0; i < 64; i++)
983 bytestream_put_byte(&buf, ctx->quant_mat[i]);
985 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
987 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
989 for (ctx->cur_picture_idx = 0;
990 ctx->cur_picture_idx < ctx->pictures_per_frame;
991 ctx->cur_picture_idx++) {
993 picture_size_pos = buf + 1;
994 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
995 buf += 4; // picture data size will be stored here
996 bytestream_put_be16 (&buf, ctx->slices_per_picture);
997 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
999 // seek table - will be filled during slice encoding
1001 buf += ctx->slices_per_picture * 2;
1004 if (!ctx->force_quant) {
1005 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1011 for (y = 0; y < ctx->mb_height; y++) {
1012 int mbs_per_slice = ctx->mbs_per_slice;
1013 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1014 q = ctx->force_quant ? ctx->force_quant
1015 : ctx->slice_q[mb + y * ctx->slices_width];
1017 while (ctx->mb_width - x < mbs_per_slice)
1018 mbs_per_slice >>= 1;
1020 bytestream_put_byte(&buf, slice_hdr_size << 3);
1022 buf += slice_hdr_size - 1;
1023 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1024 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1026 bytestream_put_byte(&slice_hdr, q);
1027 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1028 for (i = 0; i < ctx->num_planes - 1; i++) {
1029 bytestream_put_be16(&slice_hdr, sizes[i]);
1030 slice_size += sizes[i];
1032 bytestream_put_be16(&slice_sizes, slice_size);
1033 buf += slice_size - slice_hdr_size;
1037 picture_size = buf - (picture_size_pos - 1);
1038 bytestream_put_be32(&picture_size_pos, picture_size);
1042 frame_size = buf - orig_buf;
1043 bytestream_put_be32(&orig_buf, frame_size);
1045 pkt->size = frame_size;
1046 pkt->flags |= AV_PKT_FLAG_KEY;
1052 static av_cold int encode_close(AVCodecContext *avctx)
1054 ProresContext *ctx = avctx->priv_data;
1057 av_freep(&avctx->coded_frame);
1060 for (i = 0; i < avctx->thread_count; i++)
1061 av_free(ctx->tdata[i].nodes);
1063 av_freep(&ctx->tdata);
1064 av_freep(&ctx->slice_q);
1069 static void prores_fdct(DSPContext *dsp, const uint16_t *src,
1070 int linesize, int16_t *block)
1073 const uint16_t *tsrc = src;
1075 for (y = 0; y < 8; y++) {
1076 for (x = 0; x < 8; x++)
1077 block[y * 8 + x] = tsrc[x];
1078 tsrc += linesize >> 1;
1083 static av_cold int encode_init(AVCodecContext *avctx)
1085 ProresContext *ctx = avctx->priv_data;
1088 int min_quant, max_quant;
1089 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1091 avctx->bits_per_raw_sample = 10;
1092 avctx->coded_frame = av_frame_alloc();
1093 if (!avctx->coded_frame)
1094 return AVERROR(ENOMEM);
1096 ctx->fdct = prores_fdct;
1097 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1098 : ff_prores_progressive_scan;
1099 ff_dsputil_init(&ctx->dsp, avctx);
1101 mps = ctx->mbs_per_slice;
1102 if (mps & (mps - 1)) {
1103 av_log(avctx, AV_LOG_ERROR,
1104 "there should be an integer power of two MBs per slice\n");
1105 return AVERROR(EINVAL);
1107 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1108 if (ctx->alpha_bits & 7) {
1109 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1110 return AVERROR(EINVAL);
1113 ctx->alpha_bits = 0;
1116 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1119 ctx->profile_info = prores_profile_info + ctx->profile;
1120 ctx->num_planes = 3 + !!ctx->alpha_bits;
1122 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1125 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1127 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1129 ctx->slices_width = ctx->mb_width / mps;
1130 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1131 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1132 ctx->pictures_per_frame = 1 + interlaced;
1134 if (ctx->quant_sel == -1)
1135 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1137 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1139 if (strlen(ctx->vendor) != 4) {
1140 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1141 return AVERROR_INVALIDDATA;
1144 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1145 if (!ctx->force_quant) {
1146 if (!ctx->bits_per_mb) {
1147 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1148 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1149 ctx->pictures_per_frame)
1151 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1152 } else if (ctx->bits_per_mb < 128) {
1153 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1154 return AVERROR_INVALIDDATA;
1157 min_quant = ctx->profile_info->min_quant;
1158 max_quant = ctx->profile_info->max_quant;
1159 for (i = min_quant; i < MAX_STORED_Q; i++) {
1160 for (j = 0; j < 64; j++)
1161 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1164 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1165 if (!ctx->slice_q) {
1166 encode_close(avctx);
1167 return AVERROR(ENOMEM);
1170 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1172 encode_close(avctx);
1173 return AVERROR(ENOMEM);
1176 for (j = 0; j < avctx->thread_count; j++) {
1177 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1179 * sizeof(*ctx->tdata->nodes));
1180 if (!ctx->tdata[j].nodes) {
1181 encode_close(avctx);
1182 return AVERROR(ENOMEM);
1184 for (i = min_quant; i < max_quant + 2; i++) {
1185 ctx->tdata[j].nodes[i].prev_node = -1;
1186 ctx->tdata[j].nodes[i].bits = 0;
1187 ctx->tdata[j].nodes[i].score = 0;
1193 if (ctx->force_quant > 64) {
1194 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1195 return AVERROR_INVALIDDATA;
1198 for (j = 0; j < 64; j++) {
1199 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1200 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1203 ctx->bits_per_mb = ls * 8;
1204 if (ctx->chroma_factor == CFACTOR_Y444)
1205 ctx->bits_per_mb += ls * 4;
1206 if (ctx->num_planes == 4)
1207 ctx->bits_per_mb += ls * 4;
1210 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1211 ctx->slices_per_picture *
1212 (2 + 2 * ctx->num_planes +
1213 (mps * ctx->bits_per_mb) / 8)
1216 avctx->codec_tag = ctx->profile_info->tag;
1218 av_log(avctx, AV_LOG_DEBUG,
1219 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1220 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1221 interlaced ? "yes" : "no", ctx->bits_per_mb);
1222 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1223 ctx->frame_size_upper_bound);
1228 #define OFFSET(x) offsetof(ProresContext, x)
1229 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1231 static const AVOption options[] = {
1232 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1233 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1234 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1235 { .i64 = PRORES_PROFILE_STANDARD },
1236 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1237 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1238 0, 0, VE, "profile" },
1239 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1240 0, 0, VE, "profile" },
1241 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1242 0, 0, VE, "profile" },
1243 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1244 0, 0, VE, "profile" },
1245 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1246 0, 0, VE, "profile" },
1247 { "vendor", "vendor ID", OFFSET(vendor),
1248 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1249 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1250 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1251 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1252 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1253 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1254 0, 0, VE, "quant_mat" },
1255 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1256 0, 0, VE, "quant_mat" },
1257 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1258 0, 0, VE, "quant_mat" },
1259 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1260 0, 0, VE, "quant_mat" },
1261 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1262 0, 0, VE, "quant_mat" },
1263 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1264 0, 0, VE, "quant_mat" },
1265 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1266 { .i64 = 16 }, 0, 16, VE },
1270 static const AVClass proresenc_class = {
1271 .class_name = "ProRes encoder",
1272 .item_name = av_default_item_name,
1274 .version = LIBAVUTIL_VERSION_INT,
1277 AVCodec ff_prores_ks_encoder = {
1278 .name = "prores_ks",
1279 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1280 .type = AVMEDIA_TYPE_VIDEO,
1281 .id = AV_CODEC_ID_PRORES,
1282 .priv_data_size = sizeof(ProresContext),
1283 .init = encode_init,
1284 .close = encode_close,
1285 .encode2 = encode_frame,
1286 .capabilities = CODEC_CAP_SLICE_THREADS,
1287 .pix_fmts = (const enum AVPixelFormat[]) {
1288 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1289 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1291 .priv_class = &proresenc_class,