4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
29 #include "bytestream.h"
31 #include "proresdsp.h"
32 #include "proresdata.h"
34 #define CFACTOR_Y422 2
35 #define CFACTOR_Y444 3
37 #define MAX_MBS_PER_SLICE 8
39 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
42 PRORES_PROFILE_PROXY = 0,
44 PRORES_PROFILE_STANDARD,
56 static const uint8_t prores_quant_matrices[][64] = {
58 4, 7, 9, 11, 13, 14, 15, 63,
59 7, 7, 11, 12, 14, 15, 63, 63,
60 9, 11, 13, 14, 15, 63, 63, 63,
61 11, 11, 13, 14, 63, 63, 63, 63,
62 11, 13, 14, 63, 63, 63, 63, 63,
63 13, 14, 63, 63, 63, 63, 63, 63,
64 13, 63, 63, 63, 63, 63, 63, 63,
65 63, 63, 63, 63, 63, 63, 63, 63,
68 4, 5, 6, 7, 9, 11, 13, 15,
69 5, 5, 7, 8, 11, 13, 15, 17,
70 6, 7, 9, 11, 13, 15, 15, 17,
71 7, 7, 9, 11, 13, 15, 17, 19,
72 7, 9, 11, 13, 14, 16, 19, 23,
73 9, 11, 13, 14, 16, 19, 23, 29,
74 9, 11, 13, 15, 17, 21, 28, 35,
75 11, 13, 16, 17, 21, 28, 35, 41,
78 4, 4, 5, 5, 6, 7, 7, 9,
79 4, 4, 5, 6, 7, 7, 9, 9,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 5, 6, 7, 7, 9, 9, 10,
82 5, 6, 7, 7, 8, 9, 10, 12,
83 6, 7, 7, 8, 9, 10, 12, 15,
84 6, 7, 7, 9, 10, 11, 14, 17,
85 7, 7, 9, 10, 11, 14, 17, 21,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 5,
92 4, 4, 4, 4, 4, 4, 5, 5,
93 4, 4, 4, 4, 4, 5, 5, 6,
94 4, 4, 4, 4, 5, 5, 6, 7,
95 4, 4, 4, 4, 5, 6, 7, 7,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
109 #define NUM_MB_LIMITS 4
110 static const int prores_mb_limits[NUM_MB_LIMITS] = {
111 1620, // up to 720x576
112 2700, // up to 960x720
113 6075, // up to 1440x1080
114 9216, // up to 2048x1152
117 static const struct prores_profile {
118 const char *full_name;
122 int br_tab[NUM_MB_LIMITS];
124 } prores_profile_info[4] = {
126 .full_name = "proxy",
127 .tag = MKTAG('a', 'p', 'c', 'o'),
130 .br_tab = { 300, 242, 220, 194 },
131 .quant = QUANT_MAT_PROXY,
135 .tag = MKTAG('a', 'p', 'c', 's'),
138 .br_tab = { 720, 560, 490, 440 },
139 .quant = QUANT_MAT_LT,
142 .full_name = "standard",
143 .tag = MKTAG('a', 'p', 'c', 'n'),
146 .br_tab = { 1050, 808, 710, 632 },
147 .quant = QUANT_MAT_STANDARD,
150 .full_name = "high quality",
151 .tag = MKTAG('a', 'p', 'c', 'h'),
154 .br_tab = { 1566, 1216, 1070, 950 },
155 .quant = QUANT_MAT_HQ,
157 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
160 #define TRELLIS_WIDTH 16
161 #define SCORE_LIMIT INT_MAX / 2
170 #define MAX_STORED_Q 16
172 typedef struct ProresThreadData {
173 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
174 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
175 int16_t custom_q[64];
176 struct TrellisNode *nodes;
179 typedef struct ProresContext {
181 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
182 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
183 int16_t quants[MAX_STORED_Q][64];
184 int16_t custom_q[64];
185 const uint8_t *quant_mat;
187 ProresDSPContext dsp;
190 int mb_width, mb_height;
192 int num_chroma_blocks, chroma_factor;
205 const struct prores_profile *profile_info;
209 ProresThreadData *tdata;
212 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
213 int linesize, int x, int y, int w, int h,
214 DCTELEM *blocks, uint16_t *emu_buf,
215 int mbs_per_slice, int blocks_per_mb, int is_chroma)
217 const uint16_t *esrc;
218 const int mb_width = 4 * blocks_per_mb;
222 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
224 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
228 if (x + mb_width <= w && y + 16 <= h) {
230 elinesize = linesize;
235 elinesize = 16 * sizeof(*emu_buf);
237 bw = FFMIN(w - x, mb_width);
238 bh = FFMIN(h - y, 16);
240 for (j = 0; j < bh; j++) {
241 memcpy(emu_buf + j * 16,
242 (const uint8_t*)src + j * linesize,
244 pix = emu_buf[j * 16 + bw - 1];
245 for (k = bw; k < mb_width; k++)
246 emu_buf[j * 16 + k] = pix;
249 memcpy(emu_buf + j * 16,
250 emu_buf + (bh - 1) * 16,
251 mb_width * sizeof(*emu_buf));
254 ctx->dsp.fdct(esrc, elinesize, blocks);
256 if (blocks_per_mb > 2) {
257 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
260 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
262 if (blocks_per_mb > 2) {
263 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
267 ctx->dsp.fdct(esrc, elinesize, blocks);
269 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
271 if (blocks_per_mb > 2) {
272 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
274 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
284 * Write an unsigned rice/exp golomb codeword.
286 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
288 unsigned int rice_order, exp_order, switch_bits, switch_val;
291 /* number of prefix bits to switch between Rice and expGolomb */
292 switch_bits = (codebook & 3) + 1;
293 rice_order = codebook >> 5; /* rice code order */
294 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
296 switch_val = switch_bits << rice_order;
298 if (val >= switch_val) {
299 val -= switch_val - (1 << exp_order);
300 exponent = av_log2(val);
302 put_bits(pb, exponent - exp_order + switch_bits, 0);
303 put_bits(pb, exponent + 1, val);
305 exponent = val >> rice_order;
308 put_bits(pb, exponent, 0);
311 put_sbits(pb, rice_order, val);
315 #define GET_SIGN(x) ((x) >> 31)
316 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
318 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
319 int blocks_per_slice, int scale)
322 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
324 prev_dc = (blocks[0] - 0x4000) / scale;
325 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
330 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
331 dc = (blocks[0] - 0x4000) / scale;
332 delta = dc - prev_dc;
333 new_sign = GET_SIGN(delta);
334 delta = (delta ^ sign) - sign;
335 code = MAKE_CODE(delta);
336 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
337 codebook = (code + (code & 1)) >> 1;
338 codebook = FFMIN(codebook, 3);
344 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
345 int blocks_per_slice,
346 int plane_size_factor,
347 const uint8_t *scan, const int16_t *qmat)
350 int run, level, run_cb, lev_cb;
351 int max_coeffs, abs_level;
353 max_coeffs = blocks_per_slice << 6;
354 run_cb = ff_prores_run_to_cb_index[4];
355 lev_cb = ff_prores_lev_to_cb_index[2];
358 for (i = 1; i < 64; i++) {
359 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
360 level = blocks[idx] / qmat[scan[i]];
362 abs_level = FFABS(level);
363 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
364 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
366 put_sbits(pb, 1, GET_SIGN(level));
368 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
369 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
378 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
379 const uint16_t *src, int linesize,
380 int mbs_per_slice, DCTELEM *blocks,
381 int blocks_per_mb, int plane_size_factor,
384 int blocks_per_slice, saved_pos;
386 saved_pos = put_bits_count(pb);
387 blocks_per_slice = mbs_per_slice * blocks_per_mb;
389 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
390 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
391 ctx->scantable.permutated, qmat);
394 return (put_bits_count(pb) - saved_pos) >> 3;
397 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
399 int sizes[4], int x, int y, int quant,
402 ProresContext *ctx = avctx->priv_data;
406 int slice_width_factor = av_log2(mbs_per_slice);
407 int num_cblocks, pwidth;
408 int plane_factor, is_chroma;
411 if (ctx->force_quant) {
412 qmat = ctx->quants[0];
413 } else if (quant < MAX_STORED_Q) {
414 qmat = ctx->quants[quant];
416 qmat = ctx->custom_q;
417 for (i = 0; i < 64; i++)
418 qmat[i] = ctx->quant_mat[i] * quant;
421 for (i = 0; i < ctx->num_planes; i++) {
422 is_chroma = (i == 1 || i == 2);
423 plane_factor = slice_width_factor + 2;
425 plane_factor += ctx->chroma_factor - 3;
426 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
430 pwidth = avctx->width;
435 pwidth = avctx->width >> 1;
437 src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
439 get_slice_data(ctx, src, pic->linesize[i], xp, yp,
440 pwidth, avctx->height, ctx->blocks[0], ctx->emu_buf,
441 mbs_per_slice, num_cblocks, is_chroma);
442 sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
443 mbs_per_slice, ctx->blocks[0],
444 num_cblocks, plane_factor,
446 total_size += sizes[i];
451 static inline int estimate_vlc(unsigned codebook, int val)
453 unsigned int rice_order, exp_order, switch_bits, switch_val;
456 /* number of prefix bits to switch between Rice and expGolomb */
457 switch_bits = (codebook & 3) + 1;
458 rice_order = codebook >> 5; /* rice code order */
459 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
461 switch_val = switch_bits << rice_order;
463 if (val >= switch_val) {
464 val -= switch_val - (1 << exp_order);
465 exponent = av_log2(val);
467 return exponent * 2 - exp_order + switch_bits + 1;
469 return (val >> rice_order) + rice_order + 1;
473 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
477 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
480 prev_dc = (blocks[0] - 0x4000) / scale;
481 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
485 *error += FFABS(blocks[0] - 0x4000) % scale;
487 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
488 dc = (blocks[0] - 0x4000) / scale;
489 *error += FFABS(blocks[0] - 0x4000) % scale;
490 delta = dc - prev_dc;
491 new_sign = GET_SIGN(delta);
492 delta = (delta ^ sign) - sign;
493 code = MAKE_CODE(delta);
494 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
495 codebook = (code + (code & 1)) >> 1;
496 codebook = FFMIN(codebook, 3);
504 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
505 int plane_size_factor,
506 const uint8_t *scan, const int16_t *qmat)
509 int run, level, run_cb, lev_cb;
510 int max_coeffs, abs_level;
513 max_coeffs = blocks_per_slice << 6;
514 run_cb = ff_prores_run_to_cb_index[4];
515 lev_cb = ff_prores_lev_to_cb_index[2];
518 for (i = 1; i < 64; i++) {
519 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
520 level = blocks[idx] / qmat[scan[i]];
521 *error += FFABS(blocks[idx]) % qmat[scan[i]];
523 abs_level = FFABS(level);
524 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
525 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
528 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
529 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
540 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
541 const uint16_t *src, int linesize,
543 int blocks_per_mb, int plane_size_factor,
544 const int16_t *qmat, ProresThreadData *td)
546 int blocks_per_slice;
549 blocks_per_slice = mbs_per_slice * blocks_per_mb;
551 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
552 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
553 plane_size_factor, ctx->scantable.permutated, qmat);
555 return FFALIGN(bits, 8);
558 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
559 int trellis_node, int x, int y, int mbs_per_slice,
560 ProresThreadData *td)
562 ProresContext *ctx = avctx->priv_data;
563 int i, q, pq, xp, yp;
565 int slice_width_factor = av_log2(mbs_per_slice);
566 int num_cblocks[MAX_PLANES], pwidth;
567 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
568 const int min_quant = ctx->profile_info->min_quant;
569 const int max_quant = ctx->profile_info->max_quant;
570 int error, bits, bits_limit;
571 int mbs, prev, cur, new_score;
572 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
576 mbs = x + mbs_per_slice;
578 for (i = 0; i < ctx->num_planes; i++) {
579 is_chroma[i] = (i == 1 || i == 2);
580 plane_factor[i] = slice_width_factor + 2;
582 plane_factor[i] += ctx->chroma_factor - 3;
583 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
587 pwidth = avctx->width;
592 pwidth = avctx->width >> 1;
594 src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
596 get_slice_data(ctx, src, pic->linesize[i], xp, yp,
597 pwidth, avctx->height, td->blocks[i], td->emu_buf,
598 mbs_per_slice, num_cblocks[i], is_chroma[i]);
601 for (q = min_quant; q < max_quant + 2; q++) {
602 td->nodes[trellis_node + q].prev_node = -1;
603 td->nodes[trellis_node + q].quant = q;
606 // todo: maybe perform coarser quantising to fit into frame size when needed
607 for (q = min_quant; q <= max_quant; q++) {
610 for (i = 0; i < ctx->num_planes; i++) {
611 bits += estimate_slice_plane(ctx, &error, i,
612 src, pic->linesize[i],
614 num_cblocks[i], plane_factor[i],
617 if (bits > 65000 * 8) {
621 slice_bits[q] = bits;
622 slice_score[q] = error;
624 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
625 slice_bits[max_quant + 1] = slice_bits[max_quant];
626 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
627 overquant = max_quant;
629 for (q = max_quant + 1; q < 128; q++) {
632 if (q < MAX_STORED_Q) {
633 qmat = ctx->quants[q];
636 for (i = 0; i < 64; i++)
637 qmat[i] = ctx->quant_mat[i] * q;
639 for (i = 0; i < ctx->num_planes; i++) {
640 bits += estimate_slice_plane(ctx, &error, i,
641 src, pic->linesize[i],
643 num_cblocks[i], plane_factor[i],
646 if (bits <= ctx->bits_per_mb * mbs_per_slice)
650 slice_bits[max_quant + 1] = bits;
651 slice_score[max_quant + 1] = error;
654 td->nodes[trellis_node + max_quant + 1].quant = overquant;
656 bits_limit = mbs * ctx->bits_per_mb;
657 for (pq = min_quant; pq < max_quant + 2; pq++) {
658 prev = trellis_node - TRELLIS_WIDTH + pq;
660 for (q = min_quant; q < max_quant + 2; q++) {
661 cur = trellis_node + q;
663 bits = td->nodes[prev].bits + slice_bits[q];
664 error = slice_score[q];
665 if (bits > bits_limit)
668 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
669 new_score = td->nodes[prev].score + error;
671 new_score = SCORE_LIMIT;
672 if (td->nodes[cur].prev_node == -1 ||
673 td->nodes[cur].score >= new_score) {
675 td->nodes[cur].bits = bits;
676 td->nodes[cur].score = new_score;
677 td->nodes[cur].prev_node = prev;
682 error = td->nodes[trellis_node + min_quant].score;
683 pq = trellis_node + min_quant;
684 for (q = min_quant + 1; q < max_quant + 2; q++) {
685 if (td->nodes[trellis_node + q].score <= error) {
686 error = td->nodes[trellis_node + q].score;
687 pq = trellis_node + q;
694 static int find_quant_thread(AVCodecContext *avctx, void *arg,
695 int jobnr, int threadnr)
697 ProresContext *ctx = avctx->priv_data;
698 ProresThreadData *td = ctx->tdata + threadnr;
699 int mbs_per_slice = ctx->mbs_per_slice;
700 int x, y = jobnr, mb, q = 0;
702 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
703 while (ctx->mb_width - x < mbs_per_slice)
705 q = find_slice_quant(avctx, avctx->coded_frame,
706 (mb + 1) * TRELLIS_WIDTH, x, y,
710 for (x = ctx->slices_width - 1; x >= 0; x--) {
711 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
712 q = td->nodes[q].prev_node;
718 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
719 const AVFrame *pic, int *got_packet)
721 ProresContext *ctx = avctx->priv_data;
722 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
723 uint8_t *picture_size_pos;
725 int x, y, i, mb, q = 0;
726 int sizes[4] = { 0 };
727 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
728 int frame_size, picture_size, slice_size;
731 *avctx->coded_frame = *pic;
732 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
733 avctx->coded_frame->key_frame = 1;
735 pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
737 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
740 orig_buf = pkt->data;
743 orig_buf += 4; // frame size
744 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
749 buf += 2; // frame header size will be stored here
750 bytestream_put_be16 (&buf, 0); // version 1
751 bytestream_put_buffer(&buf, ctx->vendor, 4);
752 bytestream_put_be16 (&buf, avctx->width);
753 bytestream_put_be16 (&buf, avctx->height);
754 bytestream_put_byte (&buf, ctx->chroma_factor << 6); // frame flags
755 bytestream_put_byte (&buf, 0); // reserved
756 bytestream_put_byte (&buf, avctx->color_primaries);
757 bytestream_put_byte (&buf, avctx->color_trc);
758 bytestream_put_byte (&buf, avctx->colorspace);
759 bytestream_put_byte (&buf, 0x40); // source format and alpha information
760 bytestream_put_byte (&buf, 0); // reserved
761 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
762 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
763 // luma quantisation matrix
764 for (i = 0; i < 64; i++)
765 bytestream_put_byte(&buf, ctx->quant_mat[i]);
766 // chroma quantisation matrix
767 for (i = 0; i < 64; i++)
768 bytestream_put_byte(&buf, ctx->quant_mat[i]);
770 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
772 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
775 picture_size_pos = buf + 1;
776 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
777 buf += 4; // picture data size will be stored here
778 bytestream_put_be16 (&buf, ctx->num_slices); // total number of slices
779 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
781 // seek table - will be filled during slice encoding
783 buf += ctx->num_slices * 2;
786 if (!ctx->force_quant) {
787 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
793 for (y = 0; y < ctx->mb_height; y++) {
794 int mbs_per_slice = ctx->mbs_per_slice;
795 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
796 q = ctx->force_quant ? ctx->force_quant
797 : ctx->slice_q[mb + y * ctx->slices_width];
799 while (ctx->mb_width - x < mbs_per_slice)
802 bytestream_put_byte(&buf, slice_hdr_size << 3);
804 buf += slice_hdr_size - 1;
805 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
806 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
808 bytestream_put_byte(&slice_hdr, q);
809 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
810 for (i = 0; i < ctx->num_planes - 1; i++) {
811 bytestream_put_be16(&slice_hdr, sizes[i]);
812 slice_size += sizes[i];
814 bytestream_put_be16(&slice_sizes, slice_size);
815 buf += slice_size - slice_hdr_size;
820 frame_size = buf - orig_buf;
821 picture_size = buf - picture_size_pos - 6;
822 bytestream_put_be32(&orig_buf, frame_size);
823 bytestream_put_be32(&picture_size_pos, picture_size);
825 pkt->size = frame_size;
826 pkt->flags |= AV_PKT_FLAG_KEY;
832 static av_cold int encode_close(AVCodecContext *avctx)
834 ProresContext *ctx = avctx->priv_data;
837 av_freep(&avctx->coded_frame);
840 for (i = 0; i < avctx->thread_count; i++)
841 av_free(ctx->tdata[i].nodes);
843 av_freep(&ctx->tdata);
844 av_freep(&ctx->slice_q);
849 static av_cold int encode_init(AVCodecContext *avctx)
851 ProresContext *ctx = avctx->priv_data;
854 int min_quant, max_quant;
856 avctx->bits_per_raw_sample = 10;
857 avctx->coded_frame = avcodec_alloc_frame();
858 if (!avctx->coded_frame)
859 return AVERROR(ENOMEM);
861 ff_proresdsp_init(&ctx->dsp, avctx);
862 ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
863 ff_prores_progressive_scan);
865 mps = ctx->mbs_per_slice;
866 if (mps & (mps - 1)) {
867 av_log(avctx, AV_LOG_ERROR,
868 "there should be an integer power of two MBs per slice\n");
869 return AVERROR(EINVAL);
872 ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
875 ctx->profile_info = prores_profile_info + ctx->profile;
878 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
879 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
880 ctx->slices_width = ctx->mb_width / mps;
881 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
882 ctx->num_slices = ctx->mb_height * ctx->slices_width;
884 if (ctx->quant_sel == -1)
885 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
887 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
889 if (strlen(ctx->vendor) != 4) {
890 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
891 return AVERROR_INVALIDDATA;
894 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
895 if (!ctx->force_quant) {
896 if (!ctx->bits_per_mb) {
897 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
898 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
900 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
901 } else if (ctx->bits_per_mb < 128) {
902 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
903 return AVERROR_INVALIDDATA;
906 min_quant = ctx->profile_info->min_quant;
907 max_quant = ctx->profile_info->max_quant;
908 for (i = min_quant; i < MAX_STORED_Q; i++) {
909 for (j = 0; j < 64; j++)
910 ctx->quants[i][j] = ctx->quant_mat[j] * i;
913 ctx->slice_q = av_malloc(ctx->num_slices * sizeof(*ctx->slice_q));
916 return AVERROR(ENOMEM);
919 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
922 return AVERROR(ENOMEM);
925 for (j = 0; j < avctx->thread_count; j++) {
926 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
928 * sizeof(*ctx->tdata->nodes));
929 if (!ctx->tdata[j].nodes) {
931 return AVERROR(ENOMEM);
933 for (i = min_quant; i < max_quant + 2; i++) {
934 ctx->tdata[j].nodes[i].prev_node = -1;
935 ctx->tdata[j].nodes[i].bits = 0;
936 ctx->tdata[j].nodes[i].score = 0;
942 if (ctx->force_quant > 64) {
943 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
944 return AVERROR_INVALIDDATA;
947 for (j = 0; j < 64; j++) {
948 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
949 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
952 ctx->bits_per_mb = ls * 8;
953 if (ctx->chroma_factor == CFACTOR_Y444)
954 ctx->bits_per_mb += ls * 4;
955 if (ctx->num_planes == 4)
956 ctx->bits_per_mb += ls * 4;
959 ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
960 + (2 * mps * ctx->bits_per_mb) / 8)
963 avctx->codec_tag = ctx->profile_info->tag;
965 av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
966 ctx->profile, ctx->num_slices, ctx->bits_per_mb);
967 av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
973 #define OFFSET(x) offsetof(ProresContext, x)
974 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
976 static const AVOption options[] = {
977 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
978 AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
979 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
980 { PRORES_PROFILE_STANDARD },
981 PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
982 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
983 0, 0, VE, "profile" },
984 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
985 0, 0, VE, "profile" },
986 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
987 0, 0, VE, "profile" },
988 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
989 0, 0, VE, "profile" },
990 { "vendor", "vendor ID", OFFSET(vendor),
991 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
992 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
993 AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE },
994 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
995 { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
996 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { -1 },
997 0, 0, VE, "quant_mat" },
998 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY },
999 0, 0, VE, "quant_mat" },
1000 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT },
1001 0, 0, VE, "quant_mat" },
1002 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD },
1003 0, 0, VE, "quant_mat" },
1004 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ },
1005 0, 0, VE, "quant_mat" },
1006 { "default", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT },
1007 0, 0, VE, "quant_mat" },
1011 static const AVClass proresenc_class = {
1012 .class_name = "ProRes encoder",
1013 .item_name = av_default_item_name,
1015 .version = LIBAVUTIL_VERSION_INT,
1018 AVCodec ff_prores_kostya_encoder = {
1019 .name = "prores_kostya",
1020 .type = AVMEDIA_TYPE_VIDEO,
1021 .id = AV_CODEC_ID_PRORES,
1022 .priv_data_size = sizeof(ProresContext),
1023 .init = encode_init,
1024 .close = encode_close,
1025 .encode2 = encode_frame,
1026 .capabilities = CODEC_CAP_SLICE_THREADS,
1027 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1028 .pix_fmts = (const enum PixelFormat[]) {
1029 PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
1031 .priv_class = &proresenc_class,