4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdsp.h"
34 #include "proresdata.h"
36 #define CFACTOR_Y422 2
37 #define CFACTOR_Y444 3
39 #define MAX_MBS_PER_SLICE 8
44 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_STANDARD,
59 static const uint8_t prores_quant_matrices[][64] = {
61 4, 7, 9, 11, 13, 14, 15, 63,
62 7, 7, 11, 12, 14, 15, 63, 63,
63 9, 11, 13, 14, 15, 63, 63, 63,
64 11, 11, 13, 14, 63, 63, 63, 63,
65 11, 13, 14, 63, 63, 63, 63, 63,
66 13, 14, 63, 63, 63, 63, 63, 63,
67 13, 63, 63, 63, 63, 63, 63, 63,
68 63, 63, 63, 63, 63, 63, 63, 63,
71 4, 5, 6, 7, 9, 11, 13, 15,
72 5, 5, 7, 8, 11, 13, 15, 17,
73 6, 7, 9, 11, 13, 15, 15, 17,
74 7, 7, 9, 11, 13, 15, 17, 19,
75 7, 9, 11, 13, 14, 16, 19, 23,
76 9, 11, 13, 14, 16, 19, 23, 29,
77 9, 11, 13, 15, 17, 21, 28, 35,
78 11, 13, 16, 17, 21, 28, 35, 41,
81 4, 4, 5, 5, 6, 7, 7, 9,
82 4, 4, 5, 6, 7, 7, 9, 9,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 6, 7, 7, 8, 9, 10, 12,
86 6, 7, 7, 8, 9, 10, 12, 15,
87 6, 7, 7, 9, 10, 11, 14, 17,
88 7, 7, 9, 10, 11, 14, 17, 21,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 5,
95 4, 4, 4, 4, 4, 4, 5, 5,
96 4, 4, 4, 4, 4, 5, 5, 6,
97 4, 4, 4, 4, 5, 5, 6, 7,
98 4, 4, 4, 4, 5, 6, 7, 7,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
112 #define NUM_MB_LIMITS 4
113 static const int prores_mb_limits[NUM_MB_LIMITS] = {
114 1620, // up to 720x576
115 2700, // up to 960x720
116 6075, // up to 1440x1080
117 9216, // up to 2048x1152
120 static const struct prores_profile {
121 const char *full_name;
125 int br_tab[NUM_MB_LIMITS];
127 } prores_profile_info[5] = {
129 .full_name = "proxy",
130 .tag = MKTAG('a', 'p', 'c', 'o'),
133 .br_tab = { 300, 242, 220, 194 },
134 .quant = QUANT_MAT_PROXY,
138 .tag = MKTAG('a', 'p', 'c', 's'),
141 .br_tab = { 720, 560, 490, 440 },
142 .quant = QUANT_MAT_LT,
145 .full_name = "standard",
146 .tag = MKTAG('a', 'p', 'c', 'n'),
149 .br_tab = { 1050, 808, 710, 632 },
150 .quant = QUANT_MAT_STANDARD,
153 .full_name = "high quality",
154 .tag = MKTAG('a', 'p', 'c', 'h'),
157 .br_tab = { 1566, 1216, 1070, 950 },
158 .quant = QUANT_MAT_HQ,
162 .tag = MKTAG('a', 'p', '4', 'h'),
165 .br_tab = { 2350, 1828, 1600, 1425 },
166 .quant = QUANT_MAT_HQ,
170 #define TRELLIS_WIDTH 16
171 #define SCORE_LIMIT INT_MAX / 2
180 #define MAX_STORED_Q 16
182 typedef struct ProresThreadData {
183 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
184 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
185 int16_t custom_q[64];
186 struct TrellisNode *nodes;
189 typedef struct ProresContext {
191 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
192 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
193 int16_t quants[MAX_STORED_Q][64];
194 int16_t custom_q[64];
195 const uint8_t *quant_mat;
197 ProresDSPContext dsp;
200 int mb_width, mb_height;
202 int num_chroma_blocks, chroma_factor;
204 int slices_per_picture;
205 int pictures_per_frame; // 1 for progressive, 2 for interlaced
215 int frame_size_upper_bound;
218 const struct prores_profile *profile_info;
222 ProresThreadData *tdata;
225 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
226 int linesize, int x, int y, int w, int h,
227 int16_t *blocks, uint16_t *emu_buf,
228 int mbs_per_slice, int blocks_per_mb, int is_chroma)
230 const uint16_t *esrc;
231 const int mb_width = 4 * blocks_per_mb;
235 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
237 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
241 if (x + mb_width <= w && y + 16 <= h) {
243 elinesize = linesize;
248 elinesize = 16 * sizeof(*emu_buf);
250 bw = FFMIN(w - x, mb_width);
251 bh = FFMIN(h - y, 16);
253 for (j = 0; j < bh; j++) {
254 memcpy(emu_buf + j * 16,
255 (const uint8_t*)src + j * linesize,
257 pix = emu_buf[j * 16 + bw - 1];
258 for (k = bw; k < mb_width; k++)
259 emu_buf[j * 16 + k] = pix;
262 memcpy(emu_buf + j * 16,
263 emu_buf + (bh - 1) * 16,
264 mb_width * sizeof(*emu_buf));
267 ctx->dsp.fdct(esrc, elinesize, blocks);
269 if (blocks_per_mb > 2) {
270 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
273 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
275 if (blocks_per_mb > 2) {
276 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
280 ctx->dsp.fdct(esrc, elinesize, blocks);
282 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
284 if (blocks_per_mb > 2) {
285 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
287 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
296 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
297 int linesize, int x, int y, int w, int h,
298 int16_t *blocks, int mbs_per_slice, int abits)
300 const int slice_width = 16 * mbs_per_slice;
301 int i, j, copy_w, copy_h;
303 copy_w = FFMIN(w - x, slice_width);
304 copy_h = FFMIN(h - y, 16);
305 for (i = 0; i < copy_h; i++) {
306 memcpy(blocks, src, copy_w * sizeof(*src));
308 for (j = 0; j < copy_w; j++)
311 for (j = 0; j < copy_w; j++)
312 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
313 for (j = copy_w; j < slice_width; j++)
314 blocks[j] = blocks[copy_w - 1];
315 blocks += slice_width;
316 src += linesize >> 1;
318 for (; i < 16; i++) {
319 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
320 blocks += slice_width;
325 * Write an unsigned rice/exp golomb codeword.
327 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
329 unsigned int rice_order, exp_order, switch_bits, switch_val;
332 /* number of prefix bits to switch between Rice and expGolomb */
333 switch_bits = (codebook & 3) + 1;
334 rice_order = codebook >> 5; /* rice code order */
335 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
337 switch_val = switch_bits << rice_order;
339 if (val >= switch_val) {
340 val -= switch_val - (1 << exp_order);
341 exponent = av_log2(val);
343 put_bits(pb, exponent - exp_order + switch_bits, 0);
344 put_bits(pb, exponent + 1, val);
346 exponent = val >> rice_order;
349 put_bits(pb, exponent, 0);
352 put_sbits(pb, rice_order, val);
356 #define GET_SIGN(x) ((x) >> 31)
357 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
359 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
360 int blocks_per_slice, int scale)
363 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
365 prev_dc = (blocks[0] - 0x4000) / scale;
366 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
371 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
372 dc = (blocks[0] - 0x4000) / scale;
373 delta = dc - prev_dc;
374 new_sign = GET_SIGN(delta);
375 delta = (delta ^ sign) - sign;
376 code = MAKE_CODE(delta);
377 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
378 codebook = (code + (code & 1)) >> 1;
379 codebook = FFMIN(codebook, 3);
385 static void encode_acs(PutBitContext *pb, int16_t *blocks,
386 int blocks_per_slice,
387 int plane_size_factor,
388 const uint8_t *scan, const int16_t *qmat)
391 int run, level, run_cb, lev_cb;
392 int max_coeffs, abs_level;
394 max_coeffs = blocks_per_slice << 6;
395 run_cb = ff_prores_run_to_cb_index[4];
396 lev_cb = ff_prores_lev_to_cb_index[2];
399 for (i = 1; i < 64; i++) {
400 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
401 level = blocks[idx] / qmat[scan[i]];
403 abs_level = FFABS(level);
404 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
405 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
407 put_sbits(pb, 1, GET_SIGN(level));
409 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
410 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
419 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
420 const uint16_t *src, int linesize,
421 int mbs_per_slice, int16_t *blocks,
422 int blocks_per_mb, int plane_size_factor,
425 int blocks_per_slice, saved_pos;
427 saved_pos = put_bits_count(pb);
428 blocks_per_slice = mbs_per_slice * blocks_per_mb;
430 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
431 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
432 ctx->scantable.permutated, qmat);
435 return (put_bits_count(pb) - saved_pos) >> 3;
438 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
440 const int mask = (1 << abits) - 1;
441 const int dbits = (abits == 8) ? 4 : 7;
442 const int dsize = 1 << dbits - 1;
443 int diff = cur - prev;
446 if (diff >= (1 << abits) - dsize)
448 if (diff < -dsize || diff > dsize || !diff) {
450 put_bits(pb, abits, diff);
453 put_bits(pb, dbits - 1, FFABS(diff) - 1);
454 put_bits(pb, 1, diff < 0);
458 static void put_alpha_run(PutBitContext *pb, int run)
463 put_bits(pb, 4, run);
465 put_bits(pb, 15, run);
471 // todo alpha quantisation for high quants
472 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
473 const uint16_t *src, int linesize,
474 int mbs_per_slice, uint16_t *blocks,
477 const int abits = ctx->alpha_bits;
478 const int mask = (1 << abits) - 1;
479 const int num_coeffs = mbs_per_slice * 256;
480 int saved_pos = put_bits_count(pb);
481 int prev = mask, cur;
486 put_alpha_diff(pb, cur, prev, abits);
491 put_alpha_run (pb, run);
492 put_alpha_diff(pb, cur, prev, abits);
498 } while (idx < num_coeffs);
500 put_alpha_run(pb, run);
502 return (put_bits_count(pb) - saved_pos) >> 3;
505 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
507 int sizes[4], int x, int y, int quant,
510 ProresContext *ctx = avctx->priv_data;
514 int slice_width_factor = av_log2(mbs_per_slice);
515 int num_cblocks, pwidth, linesize, line_add;
516 int plane_factor, is_chroma;
519 if (ctx->pictures_per_frame == 1)
522 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
524 if (ctx->force_quant) {
525 qmat = ctx->quants[0];
526 } else if (quant < MAX_STORED_Q) {
527 qmat = ctx->quants[quant];
529 qmat = ctx->custom_q;
530 for (i = 0; i < 64; i++)
531 qmat[i] = ctx->quant_mat[i] * quant;
534 for (i = 0; i < ctx->num_planes; i++) {
535 is_chroma = (i == 1 || i == 2);
536 plane_factor = slice_width_factor + 2;
538 plane_factor += ctx->chroma_factor - 3;
539 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
543 pwidth = avctx->width;
548 pwidth = avctx->width >> 1;
551 linesize = pic->linesize[i] * ctx->pictures_per_frame;
552 src = (const uint16_t*)(pic->data[i] + yp * linesize +
553 line_add * pic->linesize[i]) + xp;
556 get_slice_data(ctx, src, linesize, xp, yp,
557 pwidth, avctx->height / ctx->pictures_per_frame,
558 ctx->blocks[0], ctx->emu_buf,
559 mbs_per_slice, num_cblocks, is_chroma);
560 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
561 mbs_per_slice, ctx->blocks[0],
562 num_cblocks, plane_factor,
565 get_alpha_data(ctx, src, linesize, xp, yp,
566 pwidth, avctx->height / ctx->pictures_per_frame,
567 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
568 sizes[i] = encode_alpha_plane(ctx, pb, src, linesize,
569 mbs_per_slice, ctx->blocks[0],
572 total_size += sizes[i];
577 static inline int estimate_vlc(unsigned codebook, int val)
579 unsigned int rice_order, exp_order, switch_bits, switch_val;
582 /* number of prefix bits to switch between Rice and expGolomb */
583 switch_bits = (codebook & 3) + 1;
584 rice_order = codebook >> 5; /* rice code order */
585 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
587 switch_val = switch_bits << rice_order;
589 if (val >= switch_val) {
590 val -= switch_val - (1 << exp_order);
591 exponent = av_log2(val);
593 return exponent * 2 - exp_order + switch_bits + 1;
595 return (val >> rice_order) + rice_order + 1;
599 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
603 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
606 prev_dc = (blocks[0] - 0x4000) / scale;
607 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
611 *error += FFABS(blocks[0] - 0x4000) % scale;
613 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
614 dc = (blocks[0] - 0x4000) / scale;
615 *error += FFABS(blocks[0] - 0x4000) % scale;
616 delta = dc - prev_dc;
617 new_sign = GET_SIGN(delta);
618 delta = (delta ^ sign) - sign;
619 code = MAKE_CODE(delta);
620 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
621 codebook = (code + (code & 1)) >> 1;
622 codebook = FFMIN(codebook, 3);
630 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
631 int plane_size_factor,
632 const uint8_t *scan, const int16_t *qmat)
635 int run, level, run_cb, lev_cb;
636 int max_coeffs, abs_level;
639 max_coeffs = blocks_per_slice << 6;
640 run_cb = ff_prores_run_to_cb_index[4];
641 lev_cb = ff_prores_lev_to_cb_index[2];
644 for (i = 1; i < 64; i++) {
645 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
646 level = blocks[idx] / qmat[scan[i]];
647 *error += FFABS(blocks[idx]) % qmat[scan[i]];
649 abs_level = FFABS(level);
650 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
651 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
654 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
655 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
666 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
667 const uint16_t *src, int linesize,
669 int blocks_per_mb, int plane_size_factor,
670 const int16_t *qmat, ProresThreadData *td)
672 int blocks_per_slice;
675 blocks_per_slice = mbs_per_slice * blocks_per_mb;
677 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
678 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
679 plane_size_factor, ctx->scantable.permutated, qmat);
681 return FFALIGN(bits, 8);
684 static int est_alpha_diff(int cur, int prev, int abits)
686 const int mask = (1 << abits) - 1;
687 const int dbits = (abits == 8) ? 4 : 7;
688 const int dsize = 1 << dbits - 1;
689 int diff = cur - prev;
692 if (diff >= (1 << abits) - dsize)
694 if (diff < -dsize || diff > dsize || !diff)
700 static int estimate_alpha_plane(ProresContext *ctx, int *error,
701 const uint16_t *src, int linesize,
702 int mbs_per_slice, int quant,
705 const int abits = ctx->alpha_bits;
706 const int mask = (1 << abits) - 1;
707 const int num_coeffs = mbs_per_slice * 256;
708 int prev = mask, cur;
715 bits = est_alpha_diff(cur, prev, abits);
726 bits += est_alpha_diff(cur, prev, abits);
732 } while (idx < num_coeffs);
744 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
745 int trellis_node, int x, int y, int mbs_per_slice,
746 ProresThreadData *td)
748 ProresContext *ctx = avctx->priv_data;
749 int i, q, pq, xp, yp;
751 int slice_width_factor = av_log2(mbs_per_slice);
752 int num_cblocks[MAX_PLANES], pwidth;
753 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
754 const int min_quant = ctx->profile_info->min_quant;
755 const int max_quant = ctx->profile_info->max_quant;
756 int error, bits, bits_limit;
757 int mbs, prev, cur, new_score;
758 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
761 int linesize[4], line_add;
763 if (ctx->pictures_per_frame == 1)
766 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
767 mbs = x + mbs_per_slice;
769 for (i = 0; i < ctx->num_planes; i++) {
770 is_chroma[i] = (i == 1 || i == 2);
771 plane_factor[i] = slice_width_factor + 2;
773 plane_factor[i] += ctx->chroma_factor - 3;
774 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
778 pwidth = avctx->width;
783 pwidth = avctx->width >> 1;
786 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
787 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
788 line_add * pic->linesize[i]) + xp;
791 get_slice_data(ctx, src, linesize[i], xp, yp,
792 pwidth, avctx->height / ctx->pictures_per_frame,
793 td->blocks[i], td->emu_buf,
794 mbs_per_slice, num_cblocks[i], is_chroma[i]);
796 get_alpha_data(ctx, src, linesize[i], xp, yp,
797 pwidth, avctx->height / ctx->pictures_per_frame,
798 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
802 for (q = min_quant; q < max_quant + 2; q++) {
803 td->nodes[trellis_node + q].prev_node = -1;
804 td->nodes[trellis_node + q].quant = q;
807 // todo: maybe perform coarser quantising to fit into frame size when needed
808 for (q = min_quant; q <= max_quant; q++) {
811 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
812 bits += estimate_slice_plane(ctx, &error, i,
815 num_cblocks[i], plane_factor[i],
819 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
820 mbs_per_slice, q, td->blocks[3]);
821 if (bits > 65000 * 8) {
825 slice_bits[q] = bits;
826 slice_score[q] = error;
828 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
829 slice_bits[max_quant + 1] = slice_bits[max_quant];
830 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
831 overquant = max_quant;
833 for (q = max_quant + 1; q < 128; q++) {
836 if (q < MAX_STORED_Q) {
837 qmat = ctx->quants[q];
840 for (i = 0; i < 64; i++)
841 qmat[i] = ctx->quant_mat[i] * q;
843 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
844 bits += estimate_slice_plane(ctx, &error, i,
847 num_cblocks[i], plane_factor[i],
851 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
852 mbs_per_slice, q, td->blocks[3]);
853 if (bits <= ctx->bits_per_mb * mbs_per_slice)
857 slice_bits[max_quant + 1] = bits;
858 slice_score[max_quant + 1] = error;
861 td->nodes[trellis_node + max_quant + 1].quant = overquant;
863 bits_limit = mbs * ctx->bits_per_mb;
864 for (pq = min_quant; pq < max_quant + 2; pq++) {
865 prev = trellis_node - TRELLIS_WIDTH + pq;
867 for (q = min_quant; q < max_quant + 2; q++) {
868 cur = trellis_node + q;
870 bits = td->nodes[prev].bits + slice_bits[q];
871 error = slice_score[q];
872 if (bits > bits_limit)
875 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
876 new_score = td->nodes[prev].score + error;
878 new_score = SCORE_LIMIT;
879 if (td->nodes[cur].prev_node == -1 ||
880 td->nodes[cur].score >= new_score) {
882 td->nodes[cur].bits = bits;
883 td->nodes[cur].score = new_score;
884 td->nodes[cur].prev_node = prev;
889 error = td->nodes[trellis_node + min_quant].score;
890 pq = trellis_node + min_quant;
891 for (q = min_quant + 1; q < max_quant + 2; q++) {
892 if (td->nodes[trellis_node + q].score <= error) {
893 error = td->nodes[trellis_node + q].score;
894 pq = trellis_node + q;
901 static int find_quant_thread(AVCodecContext *avctx, void *arg,
902 int jobnr, int threadnr)
904 ProresContext *ctx = avctx->priv_data;
905 ProresThreadData *td = ctx->tdata + threadnr;
906 int mbs_per_slice = ctx->mbs_per_slice;
907 int x, y = jobnr, mb, q = 0;
909 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
910 while (ctx->mb_width - x < mbs_per_slice)
912 q = find_slice_quant(avctx, avctx->coded_frame,
913 (mb + 1) * TRELLIS_WIDTH, x, y,
917 for (x = ctx->slices_width - 1; x >= 0; x--) {
918 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
919 q = td->nodes[q].prev_node;
925 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
926 const AVFrame *pic, int *got_packet)
928 ProresContext *ctx = avctx->priv_data;
929 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
930 uint8_t *picture_size_pos;
932 int x, y, i, mb, q = 0;
933 int sizes[4] = { 0 };
934 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
935 int frame_size, picture_size, slice_size;
939 *avctx->coded_frame = *pic;
940 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
941 avctx->coded_frame->key_frame = 1;
943 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
945 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
948 orig_buf = pkt->data;
951 orig_buf += 4; // frame size
952 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
957 buf += 2; // frame header size will be stored here
958 bytestream_put_be16 (&buf, 0); // version 1
959 bytestream_put_buffer(&buf, ctx->vendor, 4);
960 bytestream_put_be16 (&buf, avctx->width);
961 bytestream_put_be16 (&buf, avctx->height);
963 frame_flags = ctx->chroma_factor << 6;
964 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
965 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
966 bytestream_put_byte (&buf, frame_flags);
968 bytestream_put_byte (&buf, 0); // reserved
969 bytestream_put_byte (&buf, avctx->color_primaries);
970 bytestream_put_byte (&buf, avctx->color_trc);
971 bytestream_put_byte (&buf, avctx->colorspace);
972 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
973 bytestream_put_byte (&buf, 0); // reserved
974 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
975 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
976 // luma quantisation matrix
977 for (i = 0; i < 64; i++)
978 bytestream_put_byte(&buf, ctx->quant_mat[i]);
979 // chroma quantisation matrix
980 for (i = 0; i < 64; i++)
981 bytestream_put_byte(&buf, ctx->quant_mat[i]);
983 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
985 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
987 for (ctx->cur_picture_idx = 0;
988 ctx->cur_picture_idx < ctx->pictures_per_frame;
989 ctx->cur_picture_idx++) {
991 picture_size_pos = buf + 1;
992 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
993 buf += 4; // picture data size will be stored here
994 bytestream_put_be16 (&buf, ctx->slices_per_picture);
995 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
997 // seek table - will be filled during slice encoding
999 buf += ctx->slices_per_picture * 2;
1002 if (!ctx->force_quant) {
1003 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1009 for (y = 0; y < ctx->mb_height; y++) {
1010 int mbs_per_slice = ctx->mbs_per_slice;
1011 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1012 q = ctx->force_quant ? ctx->force_quant
1013 : ctx->slice_q[mb + y * ctx->slices_width];
1015 while (ctx->mb_width - x < mbs_per_slice)
1016 mbs_per_slice >>= 1;
1018 bytestream_put_byte(&buf, slice_hdr_size << 3);
1020 buf += slice_hdr_size - 1;
1021 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1022 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1024 bytestream_put_byte(&slice_hdr, q);
1025 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1026 for (i = 0; i < ctx->num_planes - 1; i++) {
1027 bytestream_put_be16(&slice_hdr, sizes[i]);
1028 slice_size += sizes[i];
1030 bytestream_put_be16(&slice_sizes, slice_size);
1031 buf += slice_size - slice_hdr_size;
1035 picture_size = buf - (picture_size_pos - 1);
1036 bytestream_put_be32(&picture_size_pos, picture_size);
1040 frame_size = buf - orig_buf;
1041 bytestream_put_be32(&orig_buf, frame_size);
1043 pkt->size = frame_size;
1044 pkt->flags |= AV_PKT_FLAG_KEY;
1050 static av_cold int encode_close(AVCodecContext *avctx)
1052 ProresContext *ctx = avctx->priv_data;
1055 av_freep(&avctx->coded_frame);
1058 for (i = 0; i < avctx->thread_count; i++)
1059 av_free(ctx->tdata[i].nodes);
1061 av_freep(&ctx->tdata);
1062 av_freep(&ctx->slice_q);
1067 static av_cold int encode_init(AVCodecContext *avctx)
1069 ProresContext *ctx = avctx->priv_data;
1072 int min_quant, max_quant;
1073 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1075 avctx->bits_per_raw_sample = 10;
1076 avctx->coded_frame = av_frame_alloc();
1077 if (!avctx->coded_frame)
1078 return AVERROR(ENOMEM);
1080 ff_proresdsp_init(&ctx->dsp, avctx);
1081 ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
1082 interlaced ? ff_prores_interlaced_scan
1083 : ff_prores_progressive_scan);
1085 mps = ctx->mbs_per_slice;
1086 if (mps & (mps - 1)) {
1087 av_log(avctx, AV_LOG_ERROR,
1088 "there should be an integer power of two MBs per slice\n");
1089 return AVERROR(EINVAL);
1091 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1092 if (ctx->alpha_bits & 7) {
1093 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1094 return AVERROR(EINVAL);
1097 ctx->alpha_bits = 0;
1100 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1103 ctx->profile_info = prores_profile_info + ctx->profile;
1104 ctx->num_planes = 3 + !!ctx->alpha_bits;
1106 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1109 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1111 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1113 ctx->slices_width = ctx->mb_width / mps;
1114 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1115 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1116 ctx->pictures_per_frame = 1 + interlaced;
1118 if (ctx->quant_sel == -1)
1119 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1121 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1123 if (strlen(ctx->vendor) != 4) {
1124 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1125 return AVERROR_INVALIDDATA;
1128 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1129 if (!ctx->force_quant) {
1130 if (!ctx->bits_per_mb) {
1131 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1132 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1133 ctx->pictures_per_frame)
1135 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1136 } else if (ctx->bits_per_mb < 128) {
1137 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1138 return AVERROR_INVALIDDATA;
1141 min_quant = ctx->profile_info->min_quant;
1142 max_quant = ctx->profile_info->max_quant;
1143 for (i = min_quant; i < MAX_STORED_Q; i++) {
1144 for (j = 0; j < 64; j++)
1145 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1148 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1149 if (!ctx->slice_q) {
1150 encode_close(avctx);
1151 return AVERROR(ENOMEM);
1154 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1156 encode_close(avctx);
1157 return AVERROR(ENOMEM);
1160 for (j = 0; j < avctx->thread_count; j++) {
1161 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1163 * sizeof(*ctx->tdata->nodes));
1164 if (!ctx->tdata[j].nodes) {
1165 encode_close(avctx);
1166 return AVERROR(ENOMEM);
1168 for (i = min_quant; i < max_quant + 2; i++) {
1169 ctx->tdata[j].nodes[i].prev_node = -1;
1170 ctx->tdata[j].nodes[i].bits = 0;
1171 ctx->tdata[j].nodes[i].score = 0;
1177 if (ctx->force_quant > 64) {
1178 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1179 return AVERROR_INVALIDDATA;
1182 for (j = 0; j < 64; j++) {
1183 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1184 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1187 ctx->bits_per_mb = ls * 8;
1188 if (ctx->chroma_factor == CFACTOR_Y444)
1189 ctx->bits_per_mb += ls * 4;
1190 if (ctx->num_planes == 4)
1191 ctx->bits_per_mb += ls * 4;
1194 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1195 ctx->slices_per_picture *
1196 (2 + 2 * ctx->num_planes +
1197 (mps * ctx->bits_per_mb) / 8)
1200 avctx->codec_tag = ctx->profile_info->tag;
1202 av_log(avctx, AV_LOG_DEBUG,
1203 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1204 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1205 interlaced ? "yes" : "no", ctx->bits_per_mb);
1206 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1207 ctx->frame_size_upper_bound);
1212 #define OFFSET(x) offsetof(ProresContext, x)
1213 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1215 static const AVOption options[] = {
1216 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1217 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1218 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1219 { .i64 = PRORES_PROFILE_STANDARD },
1220 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1221 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1222 0, 0, VE, "profile" },
1223 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1224 0, 0, VE, "profile" },
1225 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1226 0, 0, VE, "profile" },
1227 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1228 0, 0, VE, "profile" },
1229 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1230 0, 0, VE, "profile" },
1231 { "vendor", "vendor ID", OFFSET(vendor),
1232 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1233 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1234 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1235 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1236 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1237 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1238 0, 0, VE, "quant_mat" },
1239 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1240 0, 0, VE, "quant_mat" },
1241 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1242 0, 0, VE, "quant_mat" },
1243 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1244 0, 0, VE, "quant_mat" },
1245 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1246 0, 0, VE, "quant_mat" },
1247 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1248 0, 0, VE, "quant_mat" },
1249 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1250 { .i64 = 16 }, 0, 16, VE },
1254 static const AVClass proresenc_class = {
1255 .class_name = "ProRes encoder",
1256 .item_name = av_default_item_name,
1258 .version = LIBAVUTIL_VERSION_INT,
1261 AVCodec ff_prores_ks_encoder = {
1262 .name = "prores_ks",
1263 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1264 .type = AVMEDIA_TYPE_VIDEO,
1265 .id = AV_CODEC_ID_PRORES,
1266 .priv_data_size = sizeof(ProresContext),
1267 .init = encode_init,
1268 .close = encode_close,
1269 .encode2 = encode_frame,
1270 .capabilities = CODEC_CAP_SLICE_THREADS,
1271 .pix_fmts = (const enum AVPixelFormat[]) {
1272 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1273 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1275 .priv_class = &proresenc_class,