4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_PROXY = 0,
45 PRORES_PROFILE_STANDARD,
58 static const uint8_t prores_quant_matrices[][64] = {
60 4, 7, 9, 11, 13, 14, 15, 63,
61 7, 7, 11, 12, 14, 15, 63, 63,
62 9, 11, 13, 14, 15, 63, 63, 63,
63 11, 11, 13, 14, 63, 63, 63, 63,
64 11, 13, 14, 63, 63, 63, 63, 63,
65 13, 14, 63, 63, 63, 63, 63, 63,
66 13, 63, 63, 63, 63, 63, 63, 63,
67 63, 63, 63, 63, 63, 63, 63, 63,
70 4, 5, 6, 7, 9, 11, 13, 15,
71 5, 5, 7, 8, 11, 13, 15, 17,
72 6, 7, 9, 11, 13, 15, 15, 17,
73 7, 7, 9, 11, 13, 15, 17, 19,
74 7, 9, 11, 13, 14, 16, 19, 23,
75 9, 11, 13, 14, 16, 19, 23, 29,
76 9, 11, 13, 15, 17, 21, 28, 35,
77 11, 13, 16, 17, 21, 28, 35, 41,
80 4, 4, 5, 5, 6, 7, 7, 9,
81 4, 4, 5, 6, 7, 7, 9, 9,
82 5, 5, 6, 7, 7, 9, 9, 10,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 6, 7, 7, 8, 9, 10, 12,
85 6, 7, 7, 8, 9, 10, 12, 15,
86 6, 7, 7, 9, 10, 11, 14, 17,
87 7, 7, 9, 10, 11, 14, 17, 21,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 5,
94 4, 4, 4, 4, 4, 4, 5, 5,
95 4, 4, 4, 4, 4, 5, 5, 6,
96 4, 4, 4, 4, 5, 5, 6, 7,
97 4, 4, 4, 4, 5, 6, 7, 7,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
111 #define NUM_MB_LIMITS 4
112 static const int prores_mb_limits[NUM_MB_LIMITS] = {
113 1620, // up to 720x576
114 2700, // up to 960x720
115 6075, // up to 1440x1080
116 9216, // up to 2048x1152
119 static const struct prores_profile {
120 const char *full_name;
124 int br_tab[NUM_MB_LIMITS];
126 } prores_profile_info[5] = {
128 .full_name = "proxy",
129 .tag = MKTAG('a', 'p', 'c', 'o'),
132 .br_tab = { 300, 242, 220, 194 },
133 .quant = QUANT_MAT_PROXY,
137 .tag = MKTAG('a', 'p', 'c', 's'),
140 .br_tab = { 720, 560, 490, 440 },
141 .quant = QUANT_MAT_LT,
144 .full_name = "standard",
145 .tag = MKTAG('a', 'p', 'c', 'n'),
148 .br_tab = { 1050, 808, 710, 632 },
149 .quant = QUANT_MAT_STANDARD,
152 .full_name = "high quality",
153 .tag = MKTAG('a', 'p', 'c', 'h'),
156 .br_tab = { 1566, 1216, 1070, 950 },
157 .quant = QUANT_MAT_HQ,
161 .tag = MKTAG('a', 'p', '4', 'h'),
164 .br_tab = { 2350, 1828, 1600, 1425 },
165 .quant = QUANT_MAT_HQ,
169 #define TRELLIS_WIDTH 16
170 #define SCORE_LIMIT INT_MAX / 2
179 #define MAX_STORED_Q 16
181 typedef struct ProresThreadData {
182 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
183 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
184 int16_t custom_q[64];
185 struct TrellisNode *nodes;
188 typedef struct ProresContext {
190 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
191 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
192 int16_t quants[MAX_STORED_Q][64];
193 int16_t custom_q[64];
194 const uint8_t *quant_mat;
195 const uint8_t *scantable;
197 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
198 int linesize, int16_t *block);
201 int mb_width, mb_height;
203 int num_chroma_blocks, chroma_factor;
205 int slices_per_picture;
206 int pictures_per_frame; // 1 for progressive, 2 for interlaced
216 int frame_size_upper_bound;
219 const struct prores_profile *profile_info;
223 ProresThreadData *tdata;
226 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
227 int linesize, int x, int y, int w, int h,
228 int16_t *blocks, uint16_t *emu_buf,
229 int mbs_per_slice, int blocks_per_mb, int is_chroma)
231 const uint16_t *esrc;
232 const int mb_width = 4 * blocks_per_mb;
236 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
238 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
242 if (x + mb_width <= w && y + 16 <= h) {
244 elinesize = linesize;
249 elinesize = 16 * sizeof(*emu_buf);
251 bw = FFMIN(w - x, mb_width);
252 bh = FFMIN(h - y, 16);
254 for (j = 0; j < bh; j++) {
255 memcpy(emu_buf + j * 16,
256 (const uint8_t*)src + j * linesize,
258 pix = emu_buf[j * 16 + bw - 1];
259 for (k = bw; k < mb_width; k++)
260 emu_buf[j * 16 + k] = pix;
263 memcpy(emu_buf + j * 16,
264 emu_buf + (bh - 1) * 16,
265 mb_width * sizeof(*emu_buf));
268 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
270 if (blocks_per_mb > 2) {
271 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
274 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
276 if (blocks_per_mb > 2) {
277 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
281 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
283 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
285 if (blocks_per_mb > 2) {
286 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
288 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
297 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
298 int linesize, int x, int y, int w, int h,
299 int16_t *blocks, int mbs_per_slice, int abits)
301 const int slice_width = 16 * mbs_per_slice;
302 int i, j, copy_w, copy_h;
304 copy_w = FFMIN(w - x, slice_width);
305 copy_h = FFMIN(h - y, 16);
306 for (i = 0; i < copy_h; i++) {
307 memcpy(blocks, src, copy_w * sizeof(*src));
309 for (j = 0; j < copy_w; j++)
312 for (j = 0; j < copy_w; j++)
313 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
314 for (j = copy_w; j < slice_width; j++)
315 blocks[j] = blocks[copy_w - 1];
316 blocks += slice_width;
317 src += linesize >> 1;
319 for (; i < 16; i++) {
320 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
321 blocks += slice_width;
326 * Write an unsigned rice/exp golomb codeword.
328 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
330 unsigned int rice_order, exp_order, switch_bits, switch_val;
333 /* number of prefix bits to switch between Rice and expGolomb */
334 switch_bits = (codebook & 3) + 1;
335 rice_order = codebook >> 5; /* rice code order */
336 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
338 switch_val = switch_bits << rice_order;
340 if (val >= switch_val) {
341 val -= switch_val - (1 << exp_order);
342 exponent = av_log2(val);
344 put_bits(pb, exponent - exp_order + switch_bits, 0);
345 put_bits(pb, exponent + 1, val);
347 exponent = val >> rice_order;
350 put_bits(pb, exponent, 0);
353 put_sbits(pb, rice_order, val);
357 #define GET_SIGN(x) ((x) >> 31)
358 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
360 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
361 int blocks_per_slice, int scale)
364 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
366 prev_dc = (blocks[0] - 0x4000) / scale;
367 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
372 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
373 dc = (blocks[0] - 0x4000) / scale;
374 delta = dc - prev_dc;
375 new_sign = GET_SIGN(delta);
376 delta = (delta ^ sign) - sign;
377 code = MAKE_CODE(delta);
378 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
379 codebook = (code + (code & 1)) >> 1;
380 codebook = FFMIN(codebook, 3);
386 static void encode_acs(PutBitContext *pb, int16_t *blocks,
387 int blocks_per_slice,
388 int plane_size_factor,
389 const uint8_t *scan, const int16_t *qmat)
392 int run, level, run_cb, lev_cb;
393 int max_coeffs, abs_level;
395 max_coeffs = blocks_per_slice << 6;
396 run_cb = ff_prores_run_to_cb_index[4];
397 lev_cb = ff_prores_lev_to_cb_index[2];
400 for (i = 1; i < 64; i++) {
401 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
402 level = blocks[idx] / qmat[scan[i]];
404 abs_level = FFABS(level);
405 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
406 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
408 put_sbits(pb, 1, GET_SIGN(level));
410 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
411 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
420 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
421 const uint16_t *src, int linesize,
422 int mbs_per_slice, int16_t *blocks,
423 int blocks_per_mb, int plane_size_factor,
426 int blocks_per_slice, saved_pos;
428 saved_pos = put_bits_count(pb);
429 blocks_per_slice = mbs_per_slice * blocks_per_mb;
431 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
432 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
433 ctx->scantable, qmat);
436 return (put_bits_count(pb) - saved_pos) >> 3;
439 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
441 const int mask = (1 << abits) - 1;
442 const int dbits = (abits == 8) ? 4 : 7;
443 const int dsize = 1 << dbits - 1;
444 int diff = cur - prev;
447 if (diff >= (1 << abits) - dsize)
449 if (diff < -dsize || diff > dsize || !diff) {
451 put_bits(pb, abits, diff);
454 put_bits(pb, dbits - 1, FFABS(diff) - 1);
455 put_bits(pb, 1, diff < 0);
459 static void put_alpha_run(PutBitContext *pb, int run)
464 put_bits(pb, 4, run);
466 put_bits(pb, 15, run);
472 // todo alpha quantisation for high quants
473 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
474 const uint16_t *src, int linesize,
475 int mbs_per_slice, uint16_t *blocks,
478 const int abits = ctx->alpha_bits;
479 const int mask = (1 << abits) - 1;
480 const int num_coeffs = mbs_per_slice * 256;
481 int saved_pos = put_bits_count(pb);
482 int prev = mask, cur;
487 put_alpha_diff(pb, cur, prev, abits);
492 put_alpha_run (pb, run);
493 put_alpha_diff(pb, cur, prev, abits);
499 } while (idx < num_coeffs);
501 put_alpha_run(pb, run);
503 return (put_bits_count(pb) - saved_pos) >> 3;
506 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
508 int sizes[4], int x, int y, int quant,
511 ProresContext *ctx = avctx->priv_data;
515 int slice_width_factor = av_log2(mbs_per_slice);
516 int num_cblocks, pwidth, linesize, line_add;
517 int plane_factor, is_chroma;
520 if (ctx->pictures_per_frame == 1)
523 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
525 if (ctx->force_quant) {
526 qmat = ctx->quants[0];
527 } else if (quant < MAX_STORED_Q) {
528 qmat = ctx->quants[quant];
530 qmat = ctx->custom_q;
531 for (i = 0; i < 64; i++)
532 qmat[i] = ctx->quant_mat[i] * quant;
535 for (i = 0; i < ctx->num_planes; i++) {
536 is_chroma = (i == 1 || i == 2);
537 plane_factor = slice_width_factor + 2;
539 plane_factor += ctx->chroma_factor - 3;
540 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
544 pwidth = avctx->width;
549 pwidth = avctx->width >> 1;
552 linesize = pic->linesize[i] * ctx->pictures_per_frame;
553 src = (const uint16_t*)(pic->data[i] + yp * linesize +
554 line_add * pic->linesize[i]) + xp;
557 get_slice_data(ctx, src, linesize, xp, yp,
558 pwidth, avctx->height / ctx->pictures_per_frame,
559 ctx->blocks[0], ctx->emu_buf,
560 mbs_per_slice, num_cblocks, is_chroma);
561 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
562 mbs_per_slice, ctx->blocks[0],
563 num_cblocks, plane_factor,
566 get_alpha_data(ctx, src, linesize, xp, yp,
567 pwidth, avctx->height / ctx->pictures_per_frame,
568 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
569 sizes[i] = encode_alpha_plane(ctx, pb, src, linesize,
570 mbs_per_slice, ctx->blocks[0],
573 total_size += sizes[i];
578 static inline int estimate_vlc(unsigned codebook, int val)
580 unsigned int rice_order, exp_order, switch_bits, switch_val;
583 /* number of prefix bits to switch between Rice and expGolomb */
584 switch_bits = (codebook & 3) + 1;
585 rice_order = codebook >> 5; /* rice code order */
586 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
588 switch_val = switch_bits << rice_order;
590 if (val >= switch_val) {
591 val -= switch_val - (1 << exp_order);
592 exponent = av_log2(val);
594 return exponent * 2 - exp_order + switch_bits + 1;
596 return (val >> rice_order) + rice_order + 1;
600 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
604 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
607 prev_dc = (blocks[0] - 0x4000) / scale;
608 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
612 *error += FFABS(blocks[0] - 0x4000) % scale;
614 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
615 dc = (blocks[0] - 0x4000) / scale;
616 *error += FFABS(blocks[0] - 0x4000) % scale;
617 delta = dc - prev_dc;
618 new_sign = GET_SIGN(delta);
619 delta = (delta ^ sign) - sign;
620 code = MAKE_CODE(delta);
621 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
622 codebook = (code + (code & 1)) >> 1;
623 codebook = FFMIN(codebook, 3);
631 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
632 int plane_size_factor,
633 const uint8_t *scan, const int16_t *qmat)
636 int run, level, run_cb, lev_cb;
637 int max_coeffs, abs_level;
640 max_coeffs = blocks_per_slice << 6;
641 run_cb = ff_prores_run_to_cb_index[4];
642 lev_cb = ff_prores_lev_to_cb_index[2];
645 for (i = 1; i < 64; i++) {
646 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
647 level = blocks[idx] / qmat[scan[i]];
648 *error += FFABS(blocks[idx]) % qmat[scan[i]];
650 abs_level = FFABS(level);
651 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
652 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
655 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
656 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
667 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
668 const uint16_t *src, int linesize,
670 int blocks_per_mb, int plane_size_factor,
671 const int16_t *qmat, ProresThreadData *td)
673 int blocks_per_slice;
676 blocks_per_slice = mbs_per_slice * blocks_per_mb;
678 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
679 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
680 plane_size_factor, ctx->scantable, qmat);
682 return FFALIGN(bits, 8);
685 static int est_alpha_diff(int cur, int prev, int abits)
687 const int mask = (1 << abits) - 1;
688 const int dbits = (abits == 8) ? 4 : 7;
689 const int dsize = 1 << dbits - 1;
690 int diff = cur - prev;
693 if (diff >= (1 << abits) - dsize)
695 if (diff < -dsize || diff > dsize || !diff)
701 static int estimate_alpha_plane(ProresContext *ctx, int *error,
702 const uint16_t *src, int linesize,
703 int mbs_per_slice, int quant,
706 const int abits = ctx->alpha_bits;
707 const int mask = (1 << abits) - 1;
708 const int num_coeffs = mbs_per_slice * 256;
709 int prev = mask, cur;
716 bits = est_alpha_diff(cur, prev, abits);
727 bits += est_alpha_diff(cur, prev, abits);
733 } while (idx < num_coeffs);
745 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
746 int trellis_node, int x, int y, int mbs_per_slice,
747 ProresThreadData *td)
749 ProresContext *ctx = avctx->priv_data;
750 int i, q, pq, xp, yp;
752 int slice_width_factor = av_log2(mbs_per_slice);
753 int num_cblocks[MAX_PLANES], pwidth;
754 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
755 const int min_quant = ctx->profile_info->min_quant;
756 const int max_quant = ctx->profile_info->max_quant;
757 int error, bits, bits_limit;
758 int mbs, prev, cur, new_score;
759 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
762 int linesize[4], line_add;
764 if (ctx->pictures_per_frame == 1)
767 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
768 mbs = x + mbs_per_slice;
770 for (i = 0; i < ctx->num_planes; i++) {
771 is_chroma[i] = (i == 1 || i == 2);
772 plane_factor[i] = slice_width_factor + 2;
774 plane_factor[i] += ctx->chroma_factor - 3;
775 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
779 pwidth = avctx->width;
784 pwidth = avctx->width >> 1;
787 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
788 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
789 line_add * pic->linesize[i]) + xp;
792 get_slice_data(ctx, src, linesize[i], xp, yp,
793 pwidth, avctx->height / ctx->pictures_per_frame,
794 td->blocks[i], td->emu_buf,
795 mbs_per_slice, num_cblocks[i], is_chroma[i]);
797 get_alpha_data(ctx, src, linesize[i], xp, yp,
798 pwidth, avctx->height / ctx->pictures_per_frame,
799 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
803 for (q = min_quant; q < max_quant + 2; q++) {
804 td->nodes[trellis_node + q].prev_node = -1;
805 td->nodes[trellis_node + q].quant = q;
808 // todo: maybe perform coarser quantising to fit into frame size when needed
809 for (q = min_quant; q <= max_quant; q++) {
812 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
813 bits += estimate_slice_plane(ctx, &error, i,
816 num_cblocks[i], plane_factor[i],
820 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
821 mbs_per_slice, q, td->blocks[3]);
822 if (bits > 65000 * 8) {
826 slice_bits[q] = bits;
827 slice_score[q] = error;
829 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
830 slice_bits[max_quant + 1] = slice_bits[max_quant];
831 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
832 overquant = max_quant;
834 for (q = max_quant + 1; q < 128; q++) {
837 if (q < MAX_STORED_Q) {
838 qmat = ctx->quants[q];
841 for (i = 0; i < 64; i++)
842 qmat[i] = ctx->quant_mat[i] * q;
844 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
845 bits += estimate_slice_plane(ctx, &error, i,
848 num_cblocks[i], plane_factor[i],
852 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
853 mbs_per_slice, q, td->blocks[3]);
854 if (bits <= ctx->bits_per_mb * mbs_per_slice)
858 slice_bits[max_quant + 1] = bits;
859 slice_score[max_quant + 1] = error;
862 td->nodes[trellis_node + max_quant + 1].quant = overquant;
864 bits_limit = mbs * ctx->bits_per_mb;
865 for (pq = min_quant; pq < max_quant + 2; pq++) {
866 prev = trellis_node - TRELLIS_WIDTH + pq;
868 for (q = min_quant; q < max_quant + 2; q++) {
869 cur = trellis_node + q;
871 bits = td->nodes[prev].bits + slice_bits[q];
872 error = slice_score[q];
873 if (bits > bits_limit)
876 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
877 new_score = td->nodes[prev].score + error;
879 new_score = SCORE_LIMIT;
880 if (td->nodes[cur].prev_node == -1 ||
881 td->nodes[cur].score >= new_score) {
883 td->nodes[cur].bits = bits;
884 td->nodes[cur].score = new_score;
885 td->nodes[cur].prev_node = prev;
890 error = td->nodes[trellis_node + min_quant].score;
891 pq = trellis_node + min_quant;
892 for (q = min_quant + 1; q < max_quant + 2; q++) {
893 if (td->nodes[trellis_node + q].score <= error) {
894 error = td->nodes[trellis_node + q].score;
895 pq = trellis_node + q;
902 static int find_quant_thread(AVCodecContext *avctx, void *arg,
903 int jobnr, int threadnr)
905 ProresContext *ctx = avctx->priv_data;
906 ProresThreadData *td = ctx->tdata + threadnr;
907 int mbs_per_slice = ctx->mbs_per_slice;
908 int x, y = jobnr, mb, q = 0;
910 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
911 while (ctx->mb_width - x < mbs_per_slice)
913 q = find_slice_quant(avctx, avctx->coded_frame,
914 (mb + 1) * TRELLIS_WIDTH, x, y,
918 for (x = ctx->slices_width - 1; x >= 0; x--) {
919 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
920 q = td->nodes[q].prev_node;
926 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
927 const AVFrame *pic, int *got_packet)
929 ProresContext *ctx = avctx->priv_data;
930 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
931 uint8_t *picture_size_pos;
933 int x, y, i, mb, q = 0;
934 int sizes[4] = { 0 };
935 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
936 int frame_size, picture_size, slice_size;
940 *avctx->coded_frame = *pic;
941 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
942 avctx->coded_frame->key_frame = 1;
944 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
946 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
949 orig_buf = pkt->data;
952 orig_buf += 4; // frame size
953 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
958 buf += 2; // frame header size will be stored here
959 bytestream_put_be16 (&buf, 0); // version 1
960 bytestream_put_buffer(&buf, ctx->vendor, 4);
961 bytestream_put_be16 (&buf, avctx->width);
962 bytestream_put_be16 (&buf, avctx->height);
964 frame_flags = ctx->chroma_factor << 6;
965 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
966 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
967 bytestream_put_byte (&buf, frame_flags);
969 bytestream_put_byte (&buf, 0); // reserved
970 bytestream_put_byte (&buf, avctx->color_primaries);
971 bytestream_put_byte (&buf, avctx->color_trc);
972 bytestream_put_byte (&buf, avctx->colorspace);
973 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
974 bytestream_put_byte (&buf, 0); // reserved
975 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
976 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
977 // luma quantisation matrix
978 for (i = 0; i < 64; i++)
979 bytestream_put_byte(&buf, ctx->quant_mat[i]);
980 // chroma quantisation matrix
981 for (i = 0; i < 64; i++)
982 bytestream_put_byte(&buf, ctx->quant_mat[i]);
984 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
986 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
988 for (ctx->cur_picture_idx = 0;
989 ctx->cur_picture_idx < ctx->pictures_per_frame;
990 ctx->cur_picture_idx++) {
992 picture_size_pos = buf + 1;
993 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
994 buf += 4; // picture data size will be stored here
995 bytestream_put_be16 (&buf, ctx->slices_per_picture);
996 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
998 // seek table - will be filled during slice encoding
1000 buf += ctx->slices_per_picture * 2;
1003 if (!ctx->force_quant) {
1004 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1010 for (y = 0; y < ctx->mb_height; y++) {
1011 int mbs_per_slice = ctx->mbs_per_slice;
1012 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1013 q = ctx->force_quant ? ctx->force_quant
1014 : ctx->slice_q[mb + y * ctx->slices_width];
1016 while (ctx->mb_width - x < mbs_per_slice)
1017 mbs_per_slice >>= 1;
1019 bytestream_put_byte(&buf, slice_hdr_size << 3);
1021 buf += slice_hdr_size - 1;
1022 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1023 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1025 bytestream_put_byte(&slice_hdr, q);
1026 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1027 for (i = 0; i < ctx->num_planes - 1; i++) {
1028 bytestream_put_be16(&slice_hdr, sizes[i]);
1029 slice_size += sizes[i];
1031 bytestream_put_be16(&slice_sizes, slice_size);
1032 buf += slice_size - slice_hdr_size;
1036 picture_size = buf - (picture_size_pos - 1);
1037 bytestream_put_be32(&picture_size_pos, picture_size);
1041 frame_size = buf - orig_buf;
1042 bytestream_put_be32(&orig_buf, frame_size);
1044 pkt->size = frame_size;
1045 pkt->flags |= AV_PKT_FLAG_KEY;
1051 static av_cold int encode_close(AVCodecContext *avctx)
1053 ProresContext *ctx = avctx->priv_data;
1056 av_freep(&avctx->coded_frame);
1059 for (i = 0; i < avctx->thread_count; i++)
1060 av_free(ctx->tdata[i].nodes);
1062 av_freep(&ctx->tdata);
1063 av_freep(&ctx->slice_q);
1068 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1069 int linesize, int16_t *block)
1072 const uint16_t *tsrc = src;
1074 for (y = 0; y < 8; y++) {
1075 for (x = 0; x < 8; x++)
1076 block[y * 8 + x] = tsrc[x];
1077 tsrc += linesize >> 1;
1082 static av_cold int encode_init(AVCodecContext *avctx)
1084 ProresContext *ctx = avctx->priv_data;
1087 int min_quant, max_quant;
1088 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1090 avctx->bits_per_raw_sample = 10;
1091 avctx->coded_frame = av_frame_alloc();
1092 if (!avctx->coded_frame)
1093 return AVERROR(ENOMEM);
1095 ctx->fdct = prores_fdct;
1096 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1097 : ff_prores_progressive_scan;
1098 ff_fdctdsp_init(&ctx->fdsp, avctx);
1100 mps = ctx->mbs_per_slice;
1101 if (mps & (mps - 1)) {
1102 av_log(avctx, AV_LOG_ERROR,
1103 "there should be an integer power of two MBs per slice\n");
1104 return AVERROR(EINVAL);
1106 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1107 if (ctx->alpha_bits & 7) {
1108 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1109 return AVERROR(EINVAL);
1112 ctx->alpha_bits = 0;
1115 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1118 ctx->profile_info = prores_profile_info + ctx->profile;
1119 ctx->num_planes = 3 + !!ctx->alpha_bits;
1121 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1124 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1126 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1128 ctx->slices_width = ctx->mb_width / mps;
1129 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1130 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1131 ctx->pictures_per_frame = 1 + interlaced;
1133 if (ctx->quant_sel == -1)
1134 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1136 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1138 if (strlen(ctx->vendor) != 4) {
1139 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1140 return AVERROR_INVALIDDATA;
1143 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1144 if (!ctx->force_quant) {
1145 if (!ctx->bits_per_mb) {
1146 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1147 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1148 ctx->pictures_per_frame)
1150 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1151 } else if (ctx->bits_per_mb < 128) {
1152 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1153 return AVERROR_INVALIDDATA;
1156 min_quant = ctx->profile_info->min_quant;
1157 max_quant = ctx->profile_info->max_quant;
1158 for (i = min_quant; i < MAX_STORED_Q; i++) {
1159 for (j = 0; j < 64; j++)
1160 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1163 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1164 if (!ctx->slice_q) {
1165 encode_close(avctx);
1166 return AVERROR(ENOMEM);
1169 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1171 encode_close(avctx);
1172 return AVERROR(ENOMEM);
1175 for (j = 0; j < avctx->thread_count; j++) {
1176 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1178 * sizeof(*ctx->tdata->nodes));
1179 if (!ctx->tdata[j].nodes) {
1180 encode_close(avctx);
1181 return AVERROR(ENOMEM);
1183 for (i = min_quant; i < max_quant + 2; i++) {
1184 ctx->tdata[j].nodes[i].prev_node = -1;
1185 ctx->tdata[j].nodes[i].bits = 0;
1186 ctx->tdata[j].nodes[i].score = 0;
1192 if (ctx->force_quant > 64) {
1193 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1194 return AVERROR_INVALIDDATA;
1197 for (j = 0; j < 64; j++) {
1198 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1199 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1202 ctx->bits_per_mb = ls * 8;
1203 if (ctx->chroma_factor == CFACTOR_Y444)
1204 ctx->bits_per_mb += ls * 4;
1205 if (ctx->num_planes == 4)
1206 ctx->bits_per_mb += ls * 4;
1209 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1210 ctx->slices_per_picture *
1211 (2 + 2 * ctx->num_planes +
1212 (mps * ctx->bits_per_mb) / 8)
1215 avctx->codec_tag = ctx->profile_info->tag;
1217 av_log(avctx, AV_LOG_DEBUG,
1218 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1219 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1220 interlaced ? "yes" : "no", ctx->bits_per_mb);
1221 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1222 ctx->frame_size_upper_bound);
1227 #define OFFSET(x) offsetof(ProresContext, x)
1228 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1230 static const AVOption options[] = {
1231 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1232 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1233 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1234 { .i64 = PRORES_PROFILE_STANDARD },
1235 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1236 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1237 0, 0, VE, "profile" },
1238 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1239 0, 0, VE, "profile" },
1240 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1241 0, 0, VE, "profile" },
1242 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1243 0, 0, VE, "profile" },
1244 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1245 0, 0, VE, "profile" },
1246 { "vendor", "vendor ID", OFFSET(vendor),
1247 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1248 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1249 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1250 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1251 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1252 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1253 0, 0, VE, "quant_mat" },
1254 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1255 0, 0, VE, "quant_mat" },
1256 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1257 0, 0, VE, "quant_mat" },
1258 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1259 0, 0, VE, "quant_mat" },
1260 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1261 0, 0, VE, "quant_mat" },
1262 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1263 0, 0, VE, "quant_mat" },
1264 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1265 { .i64 = 16 }, 0, 16, VE },
1269 static const AVClass proresenc_class = {
1270 .class_name = "ProRes encoder",
1271 .item_name = av_default_item_name,
1273 .version = LIBAVUTIL_VERSION_INT,
1276 AVCodec ff_prores_ks_encoder = {
1277 .name = "prores_ks",
1278 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1279 .type = AVMEDIA_TYPE_VIDEO,
1280 .id = AV_CODEC_ID_PRORES,
1281 .priv_data_size = sizeof(ProresContext),
1282 .init = encode_init,
1283 .close = encode_close,
1284 .encode2 = encode_frame,
1285 .capabilities = CODEC_CAP_SLICE_THREADS,
1286 .pix_fmts = (const enum AVPixelFormat[]) {
1287 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1288 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1290 .priv_class = &proresenc_class,