4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
24 #include "libavutil/pixdesc.h"
28 #include "bytestream.h"
30 #include "proresdsp.h"
31 #include "proresdata.h"
33 #define CFACTOR_Y422 2
34 #define CFACTOR_Y444 3
36 #define MAX_MBS_PER_SLICE 8
41 PRORES_PROFILE_PROXY = 0,
43 PRORES_PROFILE_STANDARD,
56 static const uint8_t prores_quant_matrices[][64] = {
58 4, 7, 9, 11, 13, 14, 15, 63,
59 7, 7, 11, 12, 14, 15, 63, 63,
60 9, 11, 13, 14, 15, 63, 63, 63,
61 11, 11, 13, 14, 63, 63, 63, 63,
62 11, 13, 14, 63, 63, 63, 63, 63,
63 13, 14, 63, 63, 63, 63, 63, 63,
64 13, 63, 63, 63, 63, 63, 63, 63,
65 63, 63, 63, 63, 63, 63, 63, 63,
68 4, 5, 6, 7, 9, 11, 13, 15,
69 5, 5, 7, 8, 11, 13, 15, 17,
70 6, 7, 9, 11, 13, 15, 15, 17,
71 7, 7, 9, 11, 13, 15, 17, 19,
72 7, 9, 11, 13, 14, 16, 19, 23,
73 9, 11, 13, 14, 16, 19, 23, 29,
74 9, 11, 13, 15, 17, 21, 28, 35,
75 11, 13, 16, 17, 21, 28, 35, 41,
78 4, 4, 5, 5, 6, 7, 7, 9,
79 4, 4, 5, 6, 7, 7, 9, 9,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 5, 6, 7, 7, 9, 9, 10,
82 5, 6, 7, 7, 8, 9, 10, 12,
83 6, 7, 7, 8, 9, 10, 12, 15,
84 6, 7, 7, 9, 10, 11, 14, 17,
85 7, 7, 9, 10, 11, 14, 17, 21,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 5,
92 4, 4, 4, 4, 4, 4, 5, 5,
93 4, 4, 4, 4, 4, 5, 5, 6,
94 4, 4, 4, 4, 5, 5, 6, 7,
95 4, 4, 4, 4, 5, 6, 7, 7,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
109 #define NUM_MB_LIMITS 4
110 static const int prores_mb_limits[NUM_MB_LIMITS] = {
111 1620, // up to 720x576
112 2700, // up to 960x720
113 6075, // up to 1440x1080
114 9216, // up to 2048x1152
117 static const struct prores_profile {
118 const char *full_name;
122 int br_tab[NUM_MB_LIMITS];
124 } prores_profile_info[5] = {
126 .full_name = "proxy",
127 .tag = MKTAG('a', 'p', 'c', 'o'),
130 .br_tab = { 300, 242, 220, 194 },
131 .quant = QUANT_MAT_PROXY,
135 .tag = MKTAG('a', 'p', 'c', 's'),
138 .br_tab = { 720, 560, 490, 440 },
139 .quant = QUANT_MAT_LT,
142 .full_name = "standard",
143 .tag = MKTAG('a', 'p', 'c', 'n'),
146 .br_tab = { 1050, 808, 710, 632 },
147 .quant = QUANT_MAT_STANDARD,
150 .full_name = "high quality",
151 .tag = MKTAG('a', 'p', 'c', 'h'),
154 .br_tab = { 1566, 1216, 1070, 950 },
155 .quant = QUANT_MAT_HQ,
159 .tag = MKTAG('a', 'p', '4', 'h'),
162 .br_tab = { 2350, 1828, 1600, 1425 },
163 .quant = QUANT_MAT_HQ,
167 #define TRELLIS_WIDTH 16
168 #define SCORE_LIMIT INT_MAX / 2
177 #define MAX_STORED_Q 16
179 typedef struct ProresThreadData {
180 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
181 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
182 int16_t custom_q[64];
183 struct TrellisNode *nodes;
186 typedef struct ProresContext {
188 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
189 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
190 int16_t quants[MAX_STORED_Q][64];
191 int16_t custom_q[64];
192 const uint8_t *quant_mat;
194 ProresDSPContext dsp;
197 int mb_width, mb_height;
199 int num_chroma_blocks, chroma_factor;
201 int slices_per_picture;
202 int pictures_per_frame; // 1 for progressive, 2 for interlaced
212 int frame_size_upper_bound;
215 const struct prores_profile *profile_info;
219 ProresThreadData *tdata;
222 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
223 int linesize, int x, int y, int w, int h,
224 int16_t *blocks, uint16_t *emu_buf,
225 int mbs_per_slice, int blocks_per_mb, int is_chroma)
227 const uint16_t *esrc;
228 const int mb_width = 4 * blocks_per_mb;
232 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
234 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
238 if (x + mb_width <= w && y + 16 <= h) {
240 elinesize = linesize;
245 elinesize = 16 * sizeof(*emu_buf);
247 bw = FFMIN(w - x, mb_width);
248 bh = FFMIN(h - y, 16);
250 for (j = 0; j < bh; j++) {
251 memcpy(emu_buf + j * 16,
252 (const uint8_t*)src + j * linesize,
254 pix = emu_buf[j * 16 + bw - 1];
255 for (k = bw; k < mb_width; k++)
256 emu_buf[j * 16 + k] = pix;
259 memcpy(emu_buf + j * 16,
260 emu_buf + (bh - 1) * 16,
261 mb_width * sizeof(*emu_buf));
264 ctx->dsp.fdct(esrc, elinesize, blocks);
266 if (blocks_per_mb > 2) {
267 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
270 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
272 if (blocks_per_mb > 2) {
273 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
277 ctx->dsp.fdct(esrc, elinesize, blocks);
279 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
281 if (blocks_per_mb > 2) {
282 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
284 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
293 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
294 int linesize, int x, int y, int w, int h,
295 int16_t *blocks, int mbs_per_slice, int abits)
297 const int slice_width = 16 * mbs_per_slice;
298 int i, j, copy_w, copy_h;
300 copy_w = FFMIN(w - x, slice_width);
301 copy_h = FFMIN(h - y, 16);
302 for (i = 0; i < copy_h; i++) {
303 memcpy(blocks, src, copy_w * sizeof(*src));
305 for (j = 0; j < copy_w; j++)
308 for (j = 0; j < copy_w; j++)
309 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
310 for (j = copy_w; j < slice_width; j++)
311 blocks[j] = blocks[copy_w - 1];
312 blocks += slice_width;
313 src += linesize >> 1;
315 for (; i < 16; i++) {
316 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
317 blocks += slice_width;
322 * Write an unsigned rice/exp golomb codeword.
324 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
326 unsigned int rice_order, exp_order, switch_bits, switch_val;
329 /* number of prefix bits to switch between Rice and expGolomb */
330 switch_bits = (codebook & 3) + 1;
331 rice_order = codebook >> 5; /* rice code order */
332 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
334 switch_val = switch_bits << rice_order;
336 if (val >= switch_val) {
337 val -= switch_val - (1 << exp_order);
338 exponent = av_log2(val);
340 put_bits(pb, exponent - exp_order + switch_bits, 0);
341 put_bits(pb, exponent + 1, val);
343 exponent = val >> rice_order;
346 put_bits(pb, exponent, 0);
349 put_sbits(pb, rice_order, val);
353 #define GET_SIGN(x) ((x) >> 31)
354 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
356 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
357 int blocks_per_slice, int scale)
360 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
362 prev_dc = (blocks[0] - 0x4000) / scale;
363 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
368 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
369 dc = (blocks[0] - 0x4000) / scale;
370 delta = dc - prev_dc;
371 new_sign = GET_SIGN(delta);
372 delta = (delta ^ sign) - sign;
373 code = MAKE_CODE(delta);
374 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
375 codebook = (code + (code & 1)) >> 1;
376 codebook = FFMIN(codebook, 3);
382 static void encode_acs(PutBitContext *pb, int16_t *blocks,
383 int blocks_per_slice,
384 int plane_size_factor,
385 const uint8_t *scan, const int16_t *qmat)
388 int run, level, run_cb, lev_cb;
389 int max_coeffs, abs_level;
391 max_coeffs = blocks_per_slice << 6;
392 run_cb = ff_prores_run_to_cb_index[4];
393 lev_cb = ff_prores_lev_to_cb_index[2];
396 for (i = 1; i < 64; i++) {
397 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
398 level = blocks[idx] / qmat[scan[i]];
400 abs_level = FFABS(level);
401 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
402 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
404 put_sbits(pb, 1, GET_SIGN(level));
406 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
407 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
416 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
417 const uint16_t *src, int linesize,
418 int mbs_per_slice, int16_t *blocks,
419 int blocks_per_mb, int plane_size_factor,
422 int blocks_per_slice, saved_pos;
424 saved_pos = put_bits_count(pb);
425 blocks_per_slice = mbs_per_slice * blocks_per_mb;
427 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
428 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
429 ctx->scantable.permutated, qmat);
432 return (put_bits_count(pb) - saved_pos) >> 3;
435 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
437 const int mask = (1 << abits) - 1;
438 const int dbits = (abits == 8) ? 4 : 7;
439 const int dsize = 1 << dbits - 1;
440 int diff = cur - prev;
443 if (diff >= (1 << abits) - dsize)
445 if (diff < -dsize || diff > dsize || !diff) {
447 put_bits(pb, abits, diff);
450 put_bits(pb, dbits - 1, FFABS(diff) - 1);
451 put_bits(pb, 1, diff < 0);
455 static void put_alpha_run(PutBitContext *pb, int run)
460 put_bits(pb, 4, run);
462 put_bits(pb, 15, run);
468 // todo alpha quantisation for high quants
469 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
470 const uint16_t *src, int linesize,
471 int mbs_per_slice, uint16_t *blocks,
474 const int abits = ctx->alpha_bits;
475 const int mask = (1 << abits) - 1;
476 const int num_coeffs = mbs_per_slice * 256;
477 int saved_pos = put_bits_count(pb);
478 int prev = mask, cur;
483 put_alpha_diff(pb, cur, prev, abits);
488 put_alpha_run (pb, run);
489 put_alpha_diff(pb, cur, prev, abits);
495 } while (idx < num_coeffs);
497 put_alpha_run(pb, run);
499 return (put_bits_count(pb) - saved_pos) >> 3;
502 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
504 int sizes[4], int x, int y, int quant,
507 ProresContext *ctx = avctx->priv_data;
511 int slice_width_factor = av_log2(mbs_per_slice);
512 int num_cblocks, pwidth, linesize, line_add;
513 int plane_factor, is_chroma;
516 if (ctx->pictures_per_frame == 1)
519 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
521 if (ctx->force_quant) {
522 qmat = ctx->quants[0];
523 } else if (quant < MAX_STORED_Q) {
524 qmat = ctx->quants[quant];
526 qmat = ctx->custom_q;
527 for (i = 0; i < 64; i++)
528 qmat[i] = ctx->quant_mat[i] * quant;
531 for (i = 0; i < ctx->num_planes; i++) {
532 is_chroma = (i == 1 || i == 2);
533 plane_factor = slice_width_factor + 2;
535 plane_factor += ctx->chroma_factor - 3;
536 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
540 pwidth = avctx->width;
545 pwidth = avctx->width >> 1;
548 linesize = pic->linesize[i] * ctx->pictures_per_frame;
549 src = (const uint16_t*)(pic->data[i] + yp * linesize +
550 line_add * pic->linesize[i]) + xp;
553 get_slice_data(ctx, src, linesize, xp, yp,
554 pwidth, avctx->height / ctx->pictures_per_frame,
555 ctx->blocks[0], ctx->emu_buf,
556 mbs_per_slice, num_cblocks, is_chroma);
557 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
558 mbs_per_slice, ctx->blocks[0],
559 num_cblocks, plane_factor,
562 get_alpha_data(ctx, src, linesize, xp, yp,
563 pwidth, avctx->height / ctx->pictures_per_frame,
564 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
565 sizes[i] = encode_alpha_plane(ctx, pb, src, linesize,
566 mbs_per_slice, ctx->blocks[0],
569 total_size += sizes[i];
574 static inline int estimate_vlc(unsigned codebook, int val)
576 unsigned int rice_order, exp_order, switch_bits, switch_val;
579 /* number of prefix bits to switch between Rice and expGolomb */
580 switch_bits = (codebook & 3) + 1;
581 rice_order = codebook >> 5; /* rice code order */
582 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
584 switch_val = switch_bits << rice_order;
586 if (val >= switch_val) {
587 val -= switch_val - (1 << exp_order);
588 exponent = av_log2(val);
590 return exponent * 2 - exp_order + switch_bits + 1;
592 return (val >> rice_order) + rice_order + 1;
596 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
600 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
603 prev_dc = (blocks[0] - 0x4000) / scale;
604 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
608 *error += FFABS(blocks[0] - 0x4000) % scale;
610 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
611 dc = (blocks[0] - 0x4000) / scale;
612 *error += FFABS(blocks[0] - 0x4000) % scale;
613 delta = dc - prev_dc;
614 new_sign = GET_SIGN(delta);
615 delta = (delta ^ sign) - sign;
616 code = MAKE_CODE(delta);
617 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
618 codebook = (code + (code & 1)) >> 1;
619 codebook = FFMIN(codebook, 3);
627 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
628 int plane_size_factor,
629 const uint8_t *scan, const int16_t *qmat)
632 int run, level, run_cb, lev_cb;
633 int max_coeffs, abs_level;
636 max_coeffs = blocks_per_slice << 6;
637 run_cb = ff_prores_run_to_cb_index[4];
638 lev_cb = ff_prores_lev_to_cb_index[2];
641 for (i = 1; i < 64; i++) {
642 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
643 level = blocks[idx] / qmat[scan[i]];
644 *error += FFABS(blocks[idx]) % qmat[scan[i]];
646 abs_level = FFABS(level);
647 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
648 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
651 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
652 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
663 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
664 const uint16_t *src, int linesize,
666 int blocks_per_mb, int plane_size_factor,
667 const int16_t *qmat, ProresThreadData *td)
669 int blocks_per_slice;
672 blocks_per_slice = mbs_per_slice * blocks_per_mb;
674 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
675 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
676 plane_size_factor, ctx->scantable.permutated, qmat);
678 return FFALIGN(bits, 8);
681 static int est_alpha_diff(int cur, int prev, int abits)
683 const int mask = (1 << abits) - 1;
684 const int dbits = (abits == 8) ? 4 : 7;
685 const int dsize = 1 << dbits - 1;
686 int diff = cur - prev;
689 if (diff >= (1 << abits) - dsize)
691 if (diff < -dsize || diff > dsize || !diff)
697 static int estimate_alpha_plane(ProresContext *ctx, int *error,
698 const uint16_t *src, int linesize,
699 int mbs_per_slice, int quant,
702 const int abits = ctx->alpha_bits;
703 const int mask = (1 << abits) - 1;
704 const int num_coeffs = mbs_per_slice * 256;
705 int prev = mask, cur;
712 bits = est_alpha_diff(cur, prev, abits);
723 bits += est_alpha_diff(cur, prev, abits);
729 } while (idx < num_coeffs);
741 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
742 int trellis_node, int x, int y, int mbs_per_slice,
743 ProresThreadData *td)
745 ProresContext *ctx = avctx->priv_data;
746 int i, q, pq, xp, yp;
748 int slice_width_factor = av_log2(mbs_per_slice);
749 int num_cblocks[MAX_PLANES], pwidth;
750 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
751 const int min_quant = ctx->profile_info->min_quant;
752 const int max_quant = ctx->profile_info->max_quant;
753 int error, bits, bits_limit;
754 int mbs, prev, cur, new_score;
755 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
758 int linesize[4], line_add;
760 if (ctx->pictures_per_frame == 1)
763 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
764 mbs = x + mbs_per_slice;
766 for (i = 0; i < ctx->num_planes; i++) {
767 is_chroma[i] = (i == 1 || i == 2);
768 plane_factor[i] = slice_width_factor + 2;
770 plane_factor[i] += ctx->chroma_factor - 3;
771 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
775 pwidth = avctx->width;
780 pwidth = avctx->width >> 1;
783 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
784 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
785 line_add * pic->linesize[i]) + xp;
788 get_slice_data(ctx, src, linesize[i], xp, yp,
789 pwidth, avctx->height / ctx->pictures_per_frame,
790 td->blocks[i], td->emu_buf,
791 mbs_per_slice, num_cblocks[i], is_chroma[i]);
793 get_alpha_data(ctx, src, linesize[i], xp, yp,
794 pwidth, avctx->height / ctx->pictures_per_frame,
795 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
799 for (q = min_quant; q < max_quant + 2; q++) {
800 td->nodes[trellis_node + q].prev_node = -1;
801 td->nodes[trellis_node + q].quant = q;
804 // todo: maybe perform coarser quantising to fit into frame size when needed
805 for (q = min_quant; q <= max_quant; q++) {
808 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
809 bits += estimate_slice_plane(ctx, &error, i,
812 num_cblocks[i], plane_factor[i],
816 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
817 mbs_per_slice, q, td->blocks[3]);
818 if (bits > 65000 * 8) {
822 slice_bits[q] = bits;
823 slice_score[q] = error;
825 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
826 slice_bits[max_quant + 1] = slice_bits[max_quant];
827 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
828 overquant = max_quant;
830 for (q = max_quant + 1; q < 128; q++) {
833 if (q < MAX_STORED_Q) {
834 qmat = ctx->quants[q];
837 for (i = 0; i < 64; i++)
838 qmat[i] = ctx->quant_mat[i] * q;
840 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
841 bits += estimate_slice_plane(ctx, &error, i,
844 num_cblocks[i], plane_factor[i],
848 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
849 mbs_per_slice, q, td->blocks[3]);
850 if (bits <= ctx->bits_per_mb * mbs_per_slice)
854 slice_bits[max_quant + 1] = bits;
855 slice_score[max_quant + 1] = error;
858 td->nodes[trellis_node + max_quant + 1].quant = overquant;
860 bits_limit = mbs * ctx->bits_per_mb;
861 for (pq = min_quant; pq < max_quant + 2; pq++) {
862 prev = trellis_node - TRELLIS_WIDTH + pq;
864 for (q = min_quant; q < max_quant + 2; q++) {
865 cur = trellis_node + q;
867 bits = td->nodes[prev].bits + slice_bits[q];
868 error = slice_score[q];
869 if (bits > bits_limit)
872 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
873 new_score = td->nodes[prev].score + error;
875 new_score = SCORE_LIMIT;
876 if (td->nodes[cur].prev_node == -1 ||
877 td->nodes[cur].score >= new_score) {
879 td->nodes[cur].bits = bits;
880 td->nodes[cur].score = new_score;
881 td->nodes[cur].prev_node = prev;
886 error = td->nodes[trellis_node + min_quant].score;
887 pq = trellis_node + min_quant;
888 for (q = min_quant + 1; q < max_quant + 2; q++) {
889 if (td->nodes[trellis_node + q].score <= error) {
890 error = td->nodes[trellis_node + q].score;
891 pq = trellis_node + q;
898 static int find_quant_thread(AVCodecContext *avctx, void *arg,
899 int jobnr, int threadnr)
901 ProresContext *ctx = avctx->priv_data;
902 ProresThreadData *td = ctx->tdata + threadnr;
903 int mbs_per_slice = ctx->mbs_per_slice;
904 int x, y = jobnr, mb, q = 0;
906 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
907 while (ctx->mb_width - x < mbs_per_slice)
909 q = find_slice_quant(avctx, avctx->coded_frame,
910 (mb + 1) * TRELLIS_WIDTH, x, y,
914 for (x = ctx->slices_width - 1; x >= 0; x--) {
915 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
916 q = td->nodes[q].prev_node;
922 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
923 const AVFrame *pic, int *got_packet)
925 ProresContext *ctx = avctx->priv_data;
926 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
927 uint8_t *picture_size_pos;
929 int x, y, i, mb, q = 0;
930 int sizes[4] = { 0 };
931 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
932 int frame_size, picture_size, slice_size;
936 *avctx->coded_frame = *pic;
937 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
938 avctx->coded_frame->key_frame = 1;
940 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
942 if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
943 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
947 orig_buf = pkt->data;
950 orig_buf += 4; // frame size
951 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
956 buf += 2; // frame header size will be stored here
957 bytestream_put_be16 (&buf, 0); // version 1
958 bytestream_put_buffer(&buf, ctx->vendor, 4);
959 bytestream_put_be16 (&buf, avctx->width);
960 bytestream_put_be16 (&buf, avctx->height);
962 frame_flags = ctx->chroma_factor << 6;
963 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
964 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
965 bytestream_put_byte (&buf, frame_flags);
967 bytestream_put_byte (&buf, 0); // reserved
968 bytestream_put_byte (&buf, avctx->color_primaries);
969 bytestream_put_byte (&buf, avctx->color_trc);
970 bytestream_put_byte (&buf, avctx->colorspace);
971 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
972 bytestream_put_byte (&buf, 0); // reserved
973 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
974 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
975 // luma quantisation matrix
976 for (i = 0; i < 64; i++)
977 bytestream_put_byte(&buf, ctx->quant_mat[i]);
978 // chroma quantisation matrix
979 for (i = 0; i < 64; i++)
980 bytestream_put_byte(&buf, ctx->quant_mat[i]);
982 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
984 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
986 for (ctx->cur_picture_idx = 0;
987 ctx->cur_picture_idx < ctx->pictures_per_frame;
988 ctx->cur_picture_idx++) {
990 picture_size_pos = buf + 1;
991 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
992 buf += 4; // picture data size will be stored here
993 bytestream_put_be16 (&buf, ctx->slices_per_picture);
994 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
996 // seek table - will be filled during slice encoding
998 buf += ctx->slices_per_picture * 2;
1001 if (!ctx->force_quant) {
1002 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1008 for (y = 0; y < ctx->mb_height; y++) {
1009 int mbs_per_slice = ctx->mbs_per_slice;
1010 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1011 q = ctx->force_quant ? ctx->force_quant
1012 : ctx->slice_q[mb + y * ctx->slices_width];
1014 while (ctx->mb_width - x < mbs_per_slice)
1015 mbs_per_slice >>= 1;
1017 bytestream_put_byte(&buf, slice_hdr_size << 3);
1019 buf += slice_hdr_size - 1;
1020 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1021 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1023 bytestream_put_byte(&slice_hdr, q);
1024 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1025 for (i = 0; i < ctx->num_planes - 1; i++) {
1026 bytestream_put_be16(&slice_hdr, sizes[i]);
1027 slice_size += sizes[i];
1029 bytestream_put_be16(&slice_sizes, slice_size);
1030 buf += slice_size - slice_hdr_size;
1034 if (ctx->pictures_per_frame == 1)
1035 picture_size = buf - picture_size_pos - 6;
1037 picture_size = buf - picture_size_pos + 1;
1038 bytestream_put_be32(&picture_size_pos, picture_size);
1042 frame_size = buf - orig_buf;
1043 bytestream_put_be32(&orig_buf, frame_size);
1045 pkt->size = frame_size;
1046 pkt->flags |= AV_PKT_FLAG_KEY;
1052 static av_cold int encode_close(AVCodecContext *avctx)
1054 ProresContext *ctx = avctx->priv_data;
1057 av_freep(&avctx->coded_frame);
1060 for (i = 0; i < avctx->thread_count; i++)
1061 av_free(ctx->tdata[i].nodes);
1063 av_freep(&ctx->tdata);
1064 av_freep(&ctx->slice_q);
1069 static av_cold int encode_init(AVCodecContext *avctx)
1071 ProresContext *ctx = avctx->priv_data;
1074 int min_quant, max_quant;
1075 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1077 avctx->bits_per_raw_sample = 10;
1078 avctx->coded_frame = av_frame_alloc();
1079 if (!avctx->coded_frame)
1080 return AVERROR(ENOMEM);
1082 ff_proresdsp_init(&ctx->dsp);
1083 ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
1084 interlaced ? ff_prores_interlaced_scan
1085 : ff_prores_progressive_scan);
1087 mps = ctx->mbs_per_slice;
1088 if (mps & (mps - 1)) {
1089 av_log(avctx, AV_LOG_ERROR,
1090 "there should be an integer power of two MBs per slice\n");
1091 return AVERROR(EINVAL);
1093 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1094 if (ctx->alpha_bits & 7) {
1095 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1096 return AVERROR(EINVAL);
1099 ctx->alpha_bits = 0;
1102 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1105 ctx->profile_info = prores_profile_info + ctx->profile;
1106 ctx->num_planes = 3 + !!ctx->alpha_bits;
1108 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1111 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1113 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1115 ctx->slices_width = ctx->mb_width / mps;
1116 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1117 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1118 ctx->pictures_per_frame = 1 + interlaced;
1120 if (ctx->quant_sel == -1)
1121 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1123 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1125 if (strlen(ctx->vendor) != 4) {
1126 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1127 return AVERROR_INVALIDDATA;
1130 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1131 if (!ctx->force_quant) {
1132 if (!ctx->bits_per_mb) {
1133 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1134 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1135 ctx->pictures_per_frame)
1137 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1138 } else if (ctx->bits_per_mb < 128) {
1139 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1140 return AVERROR_INVALIDDATA;
1143 min_quant = ctx->profile_info->min_quant;
1144 max_quant = ctx->profile_info->max_quant;
1145 for (i = min_quant; i < MAX_STORED_Q; i++) {
1146 for (j = 0; j < 64; j++)
1147 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1150 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1151 if (!ctx->slice_q) {
1152 encode_close(avctx);
1153 return AVERROR(ENOMEM);
1156 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1158 encode_close(avctx);
1159 return AVERROR(ENOMEM);
1162 for (j = 0; j < avctx->thread_count; j++) {
1163 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1165 * sizeof(*ctx->tdata->nodes));
1166 if (!ctx->tdata[j].nodes) {
1167 encode_close(avctx);
1168 return AVERROR(ENOMEM);
1170 for (i = min_quant; i < max_quant + 2; i++) {
1171 ctx->tdata[j].nodes[i].prev_node = -1;
1172 ctx->tdata[j].nodes[i].bits = 0;
1173 ctx->tdata[j].nodes[i].score = 0;
1179 if (ctx->force_quant > 64) {
1180 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1181 return AVERROR_INVALIDDATA;
1184 for (j = 0; j < 64; j++) {
1185 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1186 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1189 ctx->bits_per_mb = ls * 8;
1190 if (ctx->chroma_factor == CFACTOR_Y444)
1191 ctx->bits_per_mb += ls * 4;
1192 if (ctx->num_planes == 4)
1193 ctx->bits_per_mb += ls * 4;
1196 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1197 ctx->slices_per_picture *
1198 (2 + 2 * ctx->num_planes +
1199 (mps * ctx->bits_per_mb) / 8)
1202 avctx->codec_tag = ctx->profile_info->tag;
1204 av_log(avctx, AV_LOG_DEBUG,
1205 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1206 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1207 interlaced ? "yes" : "no", ctx->bits_per_mb);
1208 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1209 ctx->frame_size_upper_bound);
1214 #define OFFSET(x) offsetof(ProresContext, x)
1215 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1217 static const AVOption options[] = {
1218 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1219 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1220 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1221 { .i64 = PRORES_PROFILE_STANDARD },
1222 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1223 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1224 0, 0, VE, "profile" },
1225 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1226 0, 0, VE, "profile" },
1227 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1228 0, 0, VE, "profile" },
1229 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1230 0, 0, VE, "profile" },
1231 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1232 0, 0, VE, "profile" },
1233 { "vendor", "vendor ID", OFFSET(vendor),
1234 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1235 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1236 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1237 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1238 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1239 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1240 0, 0, VE, "quant_mat" },
1241 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1242 0, 0, VE, "quant_mat" },
1243 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1244 0, 0, VE, "quant_mat" },
1245 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1246 0, 0, VE, "quant_mat" },
1247 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1248 0, 0, VE, "quant_mat" },
1249 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1250 0, 0, VE, "quant_mat" },
1251 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1252 { .i64 = 16 }, 0, 16, VE },
1256 static const AVClass proresenc_class = {
1257 .class_name = "ProRes encoder",
1258 .item_name = av_default_item_name,
1260 .version = LIBAVUTIL_VERSION_INT,
1263 AVCodec ff_prores_encoder = {
1265 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1266 .type = AVMEDIA_TYPE_VIDEO,
1267 .id = AV_CODEC_ID_PRORES,
1268 .priv_data_size = sizeof(ProresContext),
1269 .init = encode_init,
1270 .close = encode_close,
1271 .encode2 = encode_frame,
1272 .capabilities = CODEC_CAP_SLICE_THREADS,
1273 .pix_fmts = (const enum AVPixelFormat[]) {
1274 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1275 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1277 .priv_class = &proresenc_class,