4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_PROXY = 0,
45 PRORES_PROFILE_STANDARD,
58 static const uint8_t prores_quant_matrices[][64] = {
60 4, 7, 9, 11, 13, 14, 15, 63,
61 7, 7, 11, 12, 14, 15, 63, 63,
62 9, 11, 13, 14, 15, 63, 63, 63,
63 11, 11, 13, 14, 63, 63, 63, 63,
64 11, 13, 14, 63, 63, 63, 63, 63,
65 13, 14, 63, 63, 63, 63, 63, 63,
66 13, 63, 63, 63, 63, 63, 63, 63,
67 63, 63, 63, 63, 63, 63, 63, 63,
70 4, 5, 6, 7, 9, 11, 13, 15,
71 5, 5, 7, 8, 11, 13, 15, 17,
72 6, 7, 9, 11, 13, 15, 15, 17,
73 7, 7, 9, 11, 13, 15, 17, 19,
74 7, 9, 11, 13, 14, 16, 19, 23,
75 9, 11, 13, 14, 16, 19, 23, 29,
76 9, 11, 13, 15, 17, 21, 28, 35,
77 11, 13, 16, 17, 21, 28, 35, 41,
80 4, 4, 5, 5, 6, 7, 7, 9,
81 4, 4, 5, 6, 7, 7, 9, 9,
82 5, 5, 6, 7, 7, 9, 9, 10,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 6, 7, 7, 8, 9, 10, 12,
85 6, 7, 7, 8, 9, 10, 12, 15,
86 6, 7, 7, 9, 10, 11, 14, 17,
87 7, 7, 9, 10, 11, 14, 17, 21,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 5,
94 4, 4, 4, 4, 4, 4, 5, 5,
95 4, 4, 4, 4, 4, 5, 5, 6,
96 4, 4, 4, 4, 5, 5, 6, 7,
97 4, 4, 4, 4, 5, 6, 7, 7,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
111 #define NUM_MB_LIMITS 4
112 static const int prores_mb_limits[NUM_MB_LIMITS] = {
113 1620, // up to 720x576
114 2700, // up to 960x720
115 6075, // up to 1440x1080
116 9216, // up to 2048x1152
119 static const struct prores_profile {
120 const char *full_name;
124 int br_tab[NUM_MB_LIMITS];
126 } prores_profile_info[5] = {
128 .full_name = "proxy",
129 .tag = MKTAG('a', 'p', 'c', 'o'),
132 .br_tab = { 300, 242, 220, 194 },
133 .quant = QUANT_MAT_PROXY,
137 .tag = MKTAG('a', 'p', 'c', 's'),
140 .br_tab = { 720, 560, 490, 440 },
141 .quant = QUANT_MAT_LT,
144 .full_name = "standard",
145 .tag = MKTAG('a', 'p', 'c', 'n'),
148 .br_tab = { 1050, 808, 710, 632 },
149 .quant = QUANT_MAT_STANDARD,
152 .full_name = "high quality",
153 .tag = MKTAG('a', 'p', 'c', 'h'),
156 .br_tab = { 1566, 1216, 1070, 950 },
157 .quant = QUANT_MAT_HQ,
161 .tag = MKTAG('a', 'p', '4', 'h'),
164 .br_tab = { 2350, 1828, 1600, 1425 },
165 .quant = QUANT_MAT_HQ,
169 #define TRELLIS_WIDTH 16
170 #define SCORE_LIMIT INT_MAX / 2
179 #define MAX_STORED_Q 16
181 typedef struct ProresThreadData {
182 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
183 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
184 int16_t custom_q[64];
185 struct TrellisNode *nodes;
188 typedef struct ProresContext {
190 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
191 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
192 int16_t quants[MAX_STORED_Q][64];
193 int16_t custom_q[64];
194 const uint8_t *quant_mat;
195 const uint8_t *scantable;
197 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
198 int linesize, int16_t *block);
201 int mb_width, mb_height;
203 int num_chroma_blocks, chroma_factor;
205 int slices_per_picture;
206 int pictures_per_frame; // 1 for progressive, 2 for interlaced
216 int frame_size_upper_bound;
219 const struct prores_profile *profile_info;
223 ProresThreadData *tdata;
226 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
227 int linesize, int x, int y, int w, int h,
228 int16_t *blocks, uint16_t *emu_buf,
229 int mbs_per_slice, int blocks_per_mb, int is_chroma)
231 const uint16_t *esrc;
232 const int mb_width = 4 * blocks_per_mb;
236 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
238 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
242 if (x + mb_width <= w && y + 16 <= h) {
244 elinesize = linesize;
249 elinesize = 16 * sizeof(*emu_buf);
251 bw = FFMIN(w - x, mb_width);
252 bh = FFMIN(h - y, 16);
254 for (j = 0; j < bh; j++) {
255 memcpy(emu_buf + j * 16,
256 (const uint8_t*)src + j * linesize,
258 pix = emu_buf[j * 16 + bw - 1];
259 for (k = bw; k < mb_width; k++)
260 emu_buf[j * 16 + k] = pix;
263 memcpy(emu_buf + j * 16,
264 emu_buf + (bh - 1) * 16,
265 mb_width * sizeof(*emu_buf));
268 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
270 if (blocks_per_mb > 2) {
271 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
274 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
276 if (blocks_per_mb > 2) {
277 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
281 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
283 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
285 if (blocks_per_mb > 2) {
286 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
288 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
297 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
298 int linesize, int x, int y, int w, int h,
299 int16_t *blocks, int mbs_per_slice, int abits)
301 const int slice_width = 16 * mbs_per_slice;
302 int i, j, copy_w, copy_h;
304 copy_w = FFMIN(w - x, slice_width);
305 copy_h = FFMIN(h - y, 16);
306 for (i = 0; i < copy_h; i++) {
307 memcpy(blocks, src, copy_w * sizeof(*src));
309 for (j = 0; j < copy_w; j++)
312 for (j = 0; j < copy_w; j++)
313 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
314 for (j = copy_w; j < slice_width; j++)
315 blocks[j] = blocks[copy_w - 1];
316 blocks += slice_width;
317 src += linesize >> 1;
319 for (; i < 16; i++) {
320 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
321 blocks += slice_width;
326 * Write an unsigned rice/exp golomb codeword.
328 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
330 unsigned int rice_order, exp_order, switch_bits, switch_val;
333 /* number of prefix bits to switch between Rice and expGolomb */
334 switch_bits = (codebook & 3) + 1;
335 rice_order = codebook >> 5; /* rice code order */
336 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
338 switch_val = switch_bits << rice_order;
340 if (val >= switch_val) {
341 val -= switch_val - (1 << exp_order);
342 exponent = av_log2(val);
344 put_bits(pb, exponent - exp_order + switch_bits, 0);
345 put_bits(pb, exponent + 1, val);
347 exponent = val >> rice_order;
350 put_bits(pb, exponent, 0);
353 put_sbits(pb, rice_order, val);
357 #define GET_SIGN(x) ((x) >> 31)
358 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
360 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
361 int blocks_per_slice, int scale)
364 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
366 prev_dc = (blocks[0] - 0x4000) / scale;
367 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
372 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
373 dc = (blocks[0] - 0x4000) / scale;
374 delta = dc - prev_dc;
375 new_sign = GET_SIGN(delta);
376 delta = (delta ^ sign) - sign;
377 code = MAKE_CODE(delta);
378 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
379 codebook = (code + (code & 1)) >> 1;
380 codebook = FFMIN(codebook, 3);
386 static void encode_acs(PutBitContext *pb, int16_t *blocks,
387 int blocks_per_slice,
388 int plane_size_factor,
389 const uint8_t *scan, const int16_t *qmat)
392 int run, level, run_cb, lev_cb;
393 int max_coeffs, abs_level;
395 max_coeffs = blocks_per_slice << 6;
396 run_cb = ff_prores_run_to_cb_index[4];
397 lev_cb = ff_prores_lev_to_cb_index[2];
400 for (i = 1; i < 64; i++) {
401 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
402 level = blocks[idx] / qmat[scan[i]];
404 abs_level = FFABS(level);
405 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
406 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
408 put_sbits(pb, 1, GET_SIGN(level));
410 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
411 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
420 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
421 const uint16_t *src, int linesize,
422 int mbs_per_slice, int16_t *blocks,
423 int blocks_per_mb, int plane_size_factor,
426 int blocks_per_slice, saved_pos;
428 saved_pos = put_bits_count(pb);
429 blocks_per_slice = mbs_per_slice * blocks_per_mb;
431 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
432 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
433 ctx->scantable, qmat);
436 return (put_bits_count(pb) - saved_pos) >> 3;
439 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
441 const int mask = (1 << abits) - 1;
442 const int dbits = (abits == 8) ? 4 : 7;
443 const int dsize = 1 << dbits - 1;
444 int diff = cur - prev;
447 if (diff >= (1 << abits) - dsize)
449 if (diff < -dsize || diff > dsize || !diff) {
451 put_bits(pb, abits, diff);
454 put_bits(pb, dbits - 1, FFABS(diff) - 1);
455 put_bits(pb, 1, diff < 0);
459 static void put_alpha_run(PutBitContext *pb, int run)
464 put_bits(pb, 4, run);
466 put_bits(pb, 15, run);
472 // todo alpha quantisation for high quants
473 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
474 int mbs_per_slice, uint16_t *blocks,
477 const int abits = ctx->alpha_bits;
478 const int mask = (1 << abits) - 1;
479 const int num_coeffs = mbs_per_slice * 256;
480 int saved_pos = put_bits_count(pb);
481 int prev = mask, cur;
486 put_alpha_diff(pb, cur, prev, abits);
491 put_alpha_run (pb, run);
492 put_alpha_diff(pb, cur, prev, abits);
498 } while (idx < num_coeffs);
500 put_alpha_run(pb, run);
502 return (put_bits_count(pb) - saved_pos) >> 3;
505 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
507 int sizes[4], int x, int y, int quant,
510 ProresContext *ctx = avctx->priv_data;
514 int slice_width_factor = av_log2(mbs_per_slice);
515 int num_cblocks, pwidth, linesize, line_add;
516 int plane_factor, is_chroma;
519 if (ctx->pictures_per_frame == 1)
522 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
524 if (ctx->force_quant) {
525 qmat = ctx->quants[0];
526 } else if (quant < MAX_STORED_Q) {
527 qmat = ctx->quants[quant];
529 qmat = ctx->custom_q;
530 for (i = 0; i < 64; i++)
531 qmat[i] = ctx->quant_mat[i] * quant;
534 for (i = 0; i < ctx->num_planes; i++) {
535 is_chroma = (i == 1 || i == 2);
536 plane_factor = slice_width_factor + 2;
538 plane_factor += ctx->chroma_factor - 3;
539 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
543 pwidth = avctx->width;
548 pwidth = avctx->width >> 1;
551 linesize = pic->linesize[i] * ctx->pictures_per_frame;
552 src = (const uint16_t*)(pic->data[i] + yp * linesize +
553 line_add * pic->linesize[i]) + xp;
556 get_slice_data(ctx, src, linesize, xp, yp,
557 pwidth, avctx->height / ctx->pictures_per_frame,
558 ctx->blocks[0], ctx->emu_buf,
559 mbs_per_slice, num_cblocks, is_chroma);
560 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
561 mbs_per_slice, ctx->blocks[0],
562 num_cblocks, plane_factor,
565 get_alpha_data(ctx, src, linesize, xp, yp,
566 pwidth, avctx->height / ctx->pictures_per_frame,
567 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
568 sizes[i] = encode_alpha_plane(ctx, pb,
569 mbs_per_slice, ctx->blocks[0],
572 total_size += sizes[i];
573 if (put_bits_left(pb) < 0) {
574 av_log(avctx, AV_LOG_ERROR, "Serious underevaluation of"
575 "required buffer size");
576 return AVERROR_BUFFER_TOO_SMALL;
582 static inline int estimate_vlc(unsigned codebook, int val)
584 unsigned int rice_order, exp_order, switch_bits, switch_val;
587 /* number of prefix bits to switch between Rice and expGolomb */
588 switch_bits = (codebook & 3) + 1;
589 rice_order = codebook >> 5; /* rice code order */
590 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
592 switch_val = switch_bits << rice_order;
594 if (val >= switch_val) {
595 val -= switch_val - (1 << exp_order);
596 exponent = av_log2(val);
598 return exponent * 2 - exp_order + switch_bits + 1;
600 return (val >> rice_order) + rice_order + 1;
604 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
608 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
611 prev_dc = (blocks[0] - 0x4000) / scale;
612 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
616 *error += FFABS(blocks[0] - 0x4000) % scale;
618 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
619 dc = (blocks[0] - 0x4000) / scale;
620 *error += FFABS(blocks[0] - 0x4000) % scale;
621 delta = dc - prev_dc;
622 new_sign = GET_SIGN(delta);
623 delta = (delta ^ sign) - sign;
624 code = MAKE_CODE(delta);
625 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
626 codebook = (code + (code & 1)) >> 1;
627 codebook = FFMIN(codebook, 3);
635 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
636 int plane_size_factor,
637 const uint8_t *scan, const int16_t *qmat)
640 int run, level, run_cb, lev_cb;
641 int max_coeffs, abs_level;
644 max_coeffs = blocks_per_slice << 6;
645 run_cb = ff_prores_run_to_cb_index[4];
646 lev_cb = ff_prores_lev_to_cb_index[2];
649 for (i = 1; i < 64; i++) {
650 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
651 level = blocks[idx] / qmat[scan[i]];
652 *error += FFABS(blocks[idx]) % qmat[scan[i]];
654 abs_level = FFABS(level);
655 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
656 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
659 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
660 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
671 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
672 const uint16_t *src, int linesize,
674 int blocks_per_mb, int plane_size_factor,
675 const int16_t *qmat, ProresThreadData *td)
677 int blocks_per_slice;
680 blocks_per_slice = mbs_per_slice * blocks_per_mb;
682 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
683 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
684 plane_size_factor, ctx->scantable, qmat);
686 return FFALIGN(bits, 8);
689 static int est_alpha_diff(int cur, int prev, int abits)
691 const int mask = (1 << abits) - 1;
692 const int dbits = (abits == 8) ? 4 : 7;
693 const int dsize = 1 << dbits - 1;
694 int diff = cur - prev;
697 if (diff >= (1 << abits) - dsize)
699 if (diff < -dsize || diff > dsize || !diff)
705 static int estimate_alpha_plane(ProresContext *ctx, int *error,
706 const uint16_t *src, int linesize,
707 int mbs_per_slice, int quant,
710 const int abits = ctx->alpha_bits;
711 const int mask = (1 << abits) - 1;
712 const int num_coeffs = mbs_per_slice * 256;
713 int prev = mask, cur;
720 bits = est_alpha_diff(cur, prev, abits);
731 bits += est_alpha_diff(cur, prev, abits);
737 } while (idx < num_coeffs);
749 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
750 int trellis_node, int x, int y, int mbs_per_slice,
751 ProresThreadData *td)
753 ProresContext *ctx = avctx->priv_data;
754 int i, q, pq, xp, yp;
756 int slice_width_factor = av_log2(mbs_per_slice);
757 int num_cblocks[MAX_PLANES], pwidth;
758 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
759 const int min_quant = ctx->profile_info->min_quant;
760 const int max_quant = ctx->profile_info->max_quant;
761 int error, bits, bits_limit;
762 int mbs, prev, cur, new_score;
763 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
766 int linesize[4], line_add;
768 if (ctx->pictures_per_frame == 1)
771 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
772 mbs = x + mbs_per_slice;
774 for (i = 0; i < ctx->num_planes; i++) {
775 is_chroma[i] = (i == 1 || i == 2);
776 plane_factor[i] = slice_width_factor + 2;
778 plane_factor[i] += ctx->chroma_factor - 3;
779 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
783 pwidth = avctx->width;
788 pwidth = avctx->width >> 1;
791 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
792 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
793 line_add * pic->linesize[i]) + xp;
796 get_slice_data(ctx, src, linesize[i], xp, yp,
797 pwidth, avctx->height / ctx->pictures_per_frame,
798 td->blocks[i], td->emu_buf,
799 mbs_per_slice, num_cblocks[i], is_chroma[i]);
801 get_alpha_data(ctx, src, linesize[i], xp, yp,
802 pwidth, avctx->height / ctx->pictures_per_frame,
803 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
807 for (q = min_quant; q < max_quant + 2; q++) {
808 td->nodes[trellis_node + q].prev_node = -1;
809 td->nodes[trellis_node + q].quant = q;
812 // todo: maybe perform coarser quantising to fit into frame size when needed
813 for (q = min_quant; q <= max_quant; q++) {
816 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
817 bits += estimate_slice_plane(ctx, &error, i,
820 num_cblocks[i], plane_factor[i],
824 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
825 mbs_per_slice, q, td->blocks[3]);
826 if (bits > 65000 * 8) {
830 slice_bits[q] = bits;
831 slice_score[q] = error;
833 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
834 slice_bits[max_quant + 1] = slice_bits[max_quant];
835 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
836 overquant = max_quant;
838 for (q = max_quant + 1; q < 128; q++) {
841 if (q < MAX_STORED_Q) {
842 qmat = ctx->quants[q];
845 for (i = 0; i < 64; i++)
846 qmat[i] = ctx->quant_mat[i] * q;
848 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
849 bits += estimate_slice_plane(ctx, &error, i,
852 num_cblocks[i], plane_factor[i],
856 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
857 mbs_per_slice, q, td->blocks[3]);
858 if (bits <= ctx->bits_per_mb * mbs_per_slice)
862 slice_bits[max_quant + 1] = bits;
863 slice_score[max_quant + 1] = error;
866 td->nodes[trellis_node + max_quant + 1].quant = overquant;
868 bits_limit = mbs * ctx->bits_per_mb;
869 for (pq = min_quant; pq < max_quant + 2; pq++) {
870 prev = trellis_node - TRELLIS_WIDTH + pq;
872 for (q = min_quant; q < max_quant + 2; q++) {
873 cur = trellis_node + q;
875 bits = td->nodes[prev].bits + slice_bits[q];
876 error = slice_score[q];
877 if (bits > bits_limit)
880 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
881 new_score = td->nodes[prev].score + error;
883 new_score = SCORE_LIMIT;
884 if (td->nodes[cur].prev_node == -1 ||
885 td->nodes[cur].score >= new_score) {
887 td->nodes[cur].bits = bits;
888 td->nodes[cur].score = new_score;
889 td->nodes[cur].prev_node = prev;
894 error = td->nodes[trellis_node + min_quant].score;
895 pq = trellis_node + min_quant;
896 for (q = min_quant + 1; q < max_quant + 2; q++) {
897 if (td->nodes[trellis_node + q].score <= error) {
898 error = td->nodes[trellis_node + q].score;
899 pq = trellis_node + q;
906 static int find_quant_thread(AVCodecContext *avctx, void *arg,
907 int jobnr, int threadnr)
909 ProresContext *ctx = avctx->priv_data;
910 ProresThreadData *td = ctx->tdata + threadnr;
911 int mbs_per_slice = ctx->mbs_per_slice;
912 int x, y = jobnr, mb, q = 0;
914 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
915 while (ctx->mb_width - x < mbs_per_slice)
917 q = find_slice_quant(avctx, avctx->coded_frame,
918 (mb + 1) * TRELLIS_WIDTH, x, y,
922 for (x = ctx->slices_width - 1; x >= 0; x--) {
923 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
924 q = td->nodes[q].prev_node;
930 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
931 const AVFrame *pic, int *got_packet)
933 ProresContext *ctx = avctx->priv_data;
934 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
935 uint8_t *picture_size_pos;
937 int x, y, i, mb, q = 0;
938 int sizes[4] = { 0 };
939 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
940 int frame_size, picture_size, slice_size;
944 *avctx->coded_frame = *pic;
945 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
946 avctx->coded_frame->key_frame = 1;
948 pkt_size = ctx->frame_size_upper_bound;
950 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
953 orig_buf = pkt->data;
956 orig_buf += 4; // frame size
957 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
962 buf += 2; // frame header size will be stored here
963 bytestream_put_be16 (&buf, 0); // version 1
964 bytestream_put_buffer(&buf, ctx->vendor, 4);
965 bytestream_put_be16 (&buf, avctx->width);
966 bytestream_put_be16 (&buf, avctx->height);
968 frame_flags = ctx->chroma_factor << 6;
969 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
970 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
971 bytestream_put_byte (&buf, frame_flags);
973 bytestream_put_byte (&buf, 0); // reserved
974 bytestream_put_byte (&buf, avctx->color_primaries);
975 bytestream_put_byte (&buf, avctx->color_trc);
976 bytestream_put_byte (&buf, avctx->colorspace);
977 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
978 bytestream_put_byte (&buf, 0); // reserved
979 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
980 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
981 // luma quantisation matrix
982 for (i = 0; i < 64; i++)
983 bytestream_put_byte(&buf, ctx->quant_mat[i]);
984 // chroma quantisation matrix
985 for (i = 0; i < 64; i++)
986 bytestream_put_byte(&buf, ctx->quant_mat[i]);
988 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
990 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
992 for (ctx->cur_picture_idx = 0;
993 ctx->cur_picture_idx < ctx->pictures_per_frame;
994 ctx->cur_picture_idx++) {
996 picture_size_pos = buf + 1;
997 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
998 buf += 4; // picture data size will be stored here
999 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1000 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1002 // seek table - will be filled during slice encoding
1004 buf += ctx->slices_per_picture * 2;
1007 if (!ctx->force_quant) {
1008 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1014 for (y = 0; y < ctx->mb_height; y++) {
1015 int mbs_per_slice = ctx->mbs_per_slice;
1016 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1017 q = ctx->force_quant ? ctx->force_quant
1018 : ctx->slice_q[mb + y * ctx->slices_width];
1020 while (ctx->mb_width - x < mbs_per_slice)
1021 mbs_per_slice >>= 1;
1023 bytestream_put_byte(&buf, slice_hdr_size << 3);
1025 buf += slice_hdr_size - 1;
1026 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1027 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1031 bytestream_put_byte(&slice_hdr, q);
1032 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1033 for (i = 0; i < ctx->num_planes - 1; i++) {
1034 bytestream_put_be16(&slice_hdr, sizes[i]);
1035 slice_size += sizes[i];
1037 bytestream_put_be16(&slice_sizes, slice_size);
1038 buf += slice_size - slice_hdr_size;
1042 picture_size = buf - (picture_size_pos - 1);
1043 bytestream_put_be32(&picture_size_pos, picture_size);
1047 frame_size = buf - orig_buf;
1048 bytestream_put_be32(&orig_buf, frame_size);
1050 pkt->size = frame_size;
1051 pkt->flags |= AV_PKT_FLAG_KEY;
1057 static av_cold int encode_close(AVCodecContext *avctx)
1059 ProresContext *ctx = avctx->priv_data;
1062 av_freep(&avctx->coded_frame);
1065 for (i = 0; i < avctx->thread_count; i++)
1066 av_free(ctx->tdata[i].nodes);
1068 av_freep(&ctx->tdata);
1069 av_freep(&ctx->slice_q);
1074 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1075 int linesize, int16_t *block)
1078 const uint16_t *tsrc = src;
1080 for (y = 0; y < 8; y++) {
1081 for (x = 0; x < 8; x++)
1082 block[y * 8 + x] = tsrc[x];
1083 tsrc += linesize >> 1;
1088 static av_cold int encode_init(AVCodecContext *avctx)
1090 ProresContext *ctx = avctx->priv_data;
1093 int min_quant, max_quant;
1094 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1096 avctx->bits_per_raw_sample = 10;
1097 avctx->coded_frame = av_frame_alloc();
1098 if (!avctx->coded_frame)
1099 return AVERROR(ENOMEM);
1101 ctx->fdct = prores_fdct;
1102 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1103 : ff_prores_progressive_scan;
1104 ff_fdctdsp_init(&ctx->fdsp, avctx);
1106 mps = ctx->mbs_per_slice;
1107 if (mps & (mps - 1)) {
1108 av_log(avctx, AV_LOG_ERROR,
1109 "there should be an integer power of two MBs per slice\n");
1110 return AVERROR(EINVAL);
1112 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1113 if (ctx->alpha_bits & 7) {
1114 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1115 return AVERROR(EINVAL);
1118 ctx->alpha_bits = 0;
1121 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1124 ctx->profile_info = prores_profile_info + ctx->profile;
1125 ctx->num_planes = 3 + !!ctx->alpha_bits;
1127 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1130 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1132 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1134 ctx->slices_width = ctx->mb_width / mps;
1135 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1136 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1137 ctx->pictures_per_frame = 1 + interlaced;
1139 if (ctx->quant_sel == -1)
1140 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1142 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1144 if (strlen(ctx->vendor) != 4) {
1145 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1146 return AVERROR_INVALIDDATA;
1149 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1150 if (!ctx->force_quant) {
1151 if (!ctx->bits_per_mb) {
1152 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1153 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1154 ctx->pictures_per_frame)
1156 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1157 } else if (ctx->bits_per_mb < 128) {
1158 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1159 return AVERROR_INVALIDDATA;
1162 min_quant = ctx->profile_info->min_quant;
1163 max_quant = ctx->profile_info->max_quant;
1164 for (i = min_quant; i < MAX_STORED_Q; i++) {
1165 for (j = 0; j < 64; j++)
1166 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1169 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1170 if (!ctx->slice_q) {
1171 encode_close(avctx);
1172 return AVERROR(ENOMEM);
1175 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1177 encode_close(avctx);
1178 return AVERROR(ENOMEM);
1181 for (j = 0; j < avctx->thread_count; j++) {
1182 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1184 * sizeof(*ctx->tdata->nodes));
1185 if (!ctx->tdata[j].nodes) {
1186 encode_close(avctx);
1187 return AVERROR(ENOMEM);
1189 for (i = min_quant; i < max_quant + 2; i++) {
1190 ctx->tdata[j].nodes[i].prev_node = -1;
1191 ctx->tdata[j].nodes[i].bits = 0;
1192 ctx->tdata[j].nodes[i].score = 0;
1198 if (ctx->force_quant > 64) {
1199 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1200 return AVERROR_INVALIDDATA;
1203 for (j = 0; j < 64; j++) {
1204 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1205 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1208 ctx->bits_per_mb = ls * 8;
1209 if (ctx->chroma_factor == CFACTOR_Y444)
1210 ctx->bits_per_mb += ls * 4;
1211 if (ctx->num_planes == 4)
1212 ctx->bits_per_mb += ls * 4;
1215 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1216 ctx->slices_per_picture *
1217 (2 + 2 * ctx->num_planes +
1218 (mps * ctx->bits_per_mb) / 8)
1221 avctx->codec_tag = ctx->profile_info->tag;
1223 av_log(avctx, AV_LOG_DEBUG,
1224 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1225 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1226 interlaced ? "yes" : "no", ctx->bits_per_mb);
1227 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1228 ctx->frame_size_upper_bound);
1233 #define OFFSET(x) offsetof(ProresContext, x)
1234 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1236 static const AVOption options[] = {
1237 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1238 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1239 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1240 { .i64 = PRORES_PROFILE_STANDARD },
1241 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1242 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1243 0, 0, VE, "profile" },
1244 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1245 0, 0, VE, "profile" },
1246 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1247 0, 0, VE, "profile" },
1248 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1249 0, 0, VE, "profile" },
1250 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1251 0, 0, VE, "profile" },
1252 { "vendor", "vendor ID", OFFSET(vendor),
1253 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1254 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1255 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1256 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1257 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1258 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1259 0, 0, VE, "quant_mat" },
1260 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1261 0, 0, VE, "quant_mat" },
1262 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1263 0, 0, VE, "quant_mat" },
1264 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1265 0, 0, VE, "quant_mat" },
1266 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1267 0, 0, VE, "quant_mat" },
1268 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1269 0, 0, VE, "quant_mat" },
1270 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1271 { .i64 = 16 }, 0, 16, VE },
1275 static const AVClass proresenc_class = {
1276 .class_name = "ProRes encoder",
1277 .item_name = av_default_item_name,
1279 .version = LIBAVUTIL_VERSION_INT,
1282 AVCodec ff_prores_ks_encoder = {
1283 .name = "prores_ks",
1284 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1285 .type = AVMEDIA_TYPE_VIDEO,
1286 .id = AV_CODEC_ID_PRORES,
1287 .priv_data_size = sizeof(ProresContext),
1288 .init = encode_init,
1289 .close = encode_close,
1290 .encode2 = encode_frame,
1291 .capabilities = CODEC_CAP_SLICE_THREADS,
1292 .pix_fmts = (const enum AVPixelFormat[]) {
1293 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1294 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1296 .priv_class = &proresenc_class,