4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
29 #include "bytestream.h"
31 #include "proresdsp.h"
32 #include "proresdata.h"
34 #define CFACTOR_Y422 2
35 #define CFACTOR_Y444 3
37 #define MAX_MBS_PER_SLICE 8
39 #define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
42 PRORES_PROFILE_PROXY = 0,
44 PRORES_PROFILE_STANDARD,
56 static const uint8_t prores_quant_matrices[][64] = {
58 4, 7, 9, 11, 13, 14, 15, 63,
59 7, 7, 11, 12, 14, 15, 63, 63,
60 9, 11, 13, 14, 15, 63, 63, 63,
61 11, 11, 13, 14, 63, 63, 63, 63,
62 11, 13, 14, 63, 63, 63, 63, 63,
63 13, 14, 63, 63, 63, 63, 63, 63,
64 13, 63, 63, 63, 63, 63, 63, 63,
65 63, 63, 63, 63, 63, 63, 63, 63,
68 4, 5, 6, 7, 9, 11, 13, 15,
69 5, 5, 7, 8, 11, 13, 15, 17,
70 6, 7, 9, 11, 13, 15, 15, 17,
71 7, 7, 9, 11, 13, 15, 17, 19,
72 7, 9, 11, 13, 14, 16, 19, 23,
73 9, 11, 13, 14, 16, 19, 23, 29,
74 9, 11, 13, 15, 17, 21, 28, 35,
75 11, 13, 16, 17, 21, 28, 35, 41,
78 4, 4, 5, 5, 6, 7, 7, 9,
79 4, 4, 5, 6, 7, 7, 9, 9,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 5, 6, 7, 7, 9, 9, 10,
82 5, 6, 7, 7, 8, 9, 10, 12,
83 6, 7, 7, 8, 9, 10, 12, 15,
84 6, 7, 7, 9, 10, 11, 14, 17,
85 7, 7, 9, 10, 11, 14, 17, 21,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 5,
92 4, 4, 4, 4, 4, 4, 5, 5,
93 4, 4, 4, 4, 4, 5, 5, 6,
94 4, 4, 4, 4, 5, 5, 6, 7,
95 4, 4, 4, 4, 5, 6, 7, 7,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
109 #define NUM_MB_LIMITS 4
110 static const int prores_mb_limits[NUM_MB_LIMITS] = {
111 1620, // up to 720x576
112 2700, // up to 960x720
113 6075, // up to 1440x1080
114 9216, // up to 2048x1152
117 static const struct prores_profile {
118 const char *full_name;
122 int br_tab[NUM_MB_LIMITS];
124 } prores_profile_info[4] = {
126 .full_name = "proxy",
127 .tag = MKTAG('a', 'p', 'c', 'o'),
130 .br_tab = { 300, 242, 220, 194 },
131 .quant = QUANT_MAT_PROXY,
135 .tag = MKTAG('a', 'p', 'c', 's'),
138 .br_tab = { 720, 560, 490, 440 },
139 .quant = QUANT_MAT_LT,
142 .full_name = "standard",
143 .tag = MKTAG('a', 'p', 'c', 'n'),
146 .br_tab = { 1050, 808, 710, 632 },
147 .quant = QUANT_MAT_STANDARD,
150 .full_name = "high quality",
151 .tag = MKTAG('a', 'p', 'c', 'h'),
154 .br_tab = { 1566, 1216, 1070, 950 },
155 .quant = QUANT_MAT_HQ,
157 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
160 #define TRELLIS_WIDTH 16
161 #define SCORE_LIMIT INT_MAX / 2
170 #define MAX_STORED_Q 16
172 typedef struct ProresThreadData {
173 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
174 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
175 int16_t custom_q[64];
176 struct TrellisNode *nodes;
179 typedef struct ProresContext {
181 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
182 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
183 int16_t quants[MAX_STORED_Q][64];
184 int16_t custom_q[64];
185 const uint8_t *quant_mat;
187 ProresDSPContext dsp;
190 int mb_width, mb_height;
192 int num_chroma_blocks, chroma_factor;
205 const struct prores_profile *profile_info;
209 ProresThreadData *tdata;
212 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
213 int linesize, int x, int y, int w, int h,
214 DCTELEM *blocks, uint16_t *emu_buf,
215 int mbs_per_slice, int blocks_per_mb, int is_chroma)
217 const uint16_t *esrc;
218 const int mb_width = 4 * blocks_per_mb;
222 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
224 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
228 if (x + mb_width <= w && y + 16 <= h) {
230 elinesize = linesize;
235 elinesize = 16 * sizeof(*emu_buf);
237 bw = FFMIN(w - x, mb_width);
238 bh = FFMIN(h - y, 16);
240 for (j = 0; j < bh; j++) {
241 memcpy(emu_buf + j * 16,
242 (const uint8_t*)src + j * linesize,
244 pix = emu_buf[j * 16 + bw - 1];
245 for (k = bw; k < mb_width; k++)
246 emu_buf[j * 16 + k] = pix;
249 memcpy(emu_buf + j * 16,
250 emu_buf + (bh - 1) * 16,
251 mb_width * sizeof(*emu_buf));
254 ctx->dsp.fdct(esrc, elinesize, blocks);
256 if (blocks_per_mb > 2) {
257 ctx->dsp.fdct(src + 8, linesize, blocks);
260 ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
262 if (blocks_per_mb > 2) {
263 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
267 ctx->dsp.fdct(esrc, elinesize, blocks);
269 ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
271 if (blocks_per_mb > 2) {
272 ctx->dsp.fdct(src + 8, linesize, blocks);
274 ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
284 * Write an unsigned rice/exp golomb codeword.
286 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
288 unsigned int rice_order, exp_order, switch_bits, switch_val;
291 /* number of prefix bits to switch between Rice and expGolomb */
292 switch_bits = (codebook & 3) + 1;
293 rice_order = codebook >> 5; /* rice code order */
294 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
296 switch_val = switch_bits << rice_order;
298 if (val >= switch_val) {
299 val -= switch_val - (1 << exp_order);
300 exponent = av_log2(val);
302 put_bits(pb, exponent - exp_order + switch_bits, 0);
303 put_bits(pb, exponent + 1, val);
305 exponent = val >> rice_order;
308 put_bits(pb, exponent, 0);
311 put_sbits(pb, rice_order, val);
315 #define GET_SIGN(x) ((x) >> 31)
316 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
318 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
319 int blocks_per_slice, int scale)
322 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
324 prev_dc = (blocks[0] - 0x4000) / scale;
325 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
330 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
331 dc = (blocks[0] - 0x4000) / scale;
332 delta = dc - prev_dc;
333 new_sign = GET_SIGN(delta);
334 delta = (delta ^ sign) - sign;
335 code = MAKE_CODE(delta);
336 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
337 codebook = (code + (code & 1)) >> 1;
338 codebook = FFMIN(codebook, 3);
344 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
345 int blocks_per_slice,
346 int plane_size_factor,
347 const uint8_t *scan, const int16_t *qmat)
350 int run, level, run_cb, lev_cb;
351 int max_coeffs, abs_level;
353 max_coeffs = blocks_per_slice << 6;
354 run_cb = ff_prores_run_to_cb_index[4];
355 lev_cb = ff_prores_lev_to_cb_index[2];
358 for (i = 1; i < 64; i++) {
359 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
360 level = blocks[idx] / qmat[scan[i]];
362 abs_level = FFABS(level);
363 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
364 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
366 put_sbits(pb, 1, GET_SIGN(level));
368 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
369 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
378 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
379 const uint16_t *src, int linesize,
380 int mbs_per_slice, DCTELEM *blocks,
381 int blocks_per_mb, int plane_size_factor,
384 int blocks_per_slice, saved_pos;
386 saved_pos = put_bits_count(pb);
387 blocks_per_slice = mbs_per_slice * blocks_per_mb;
389 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
390 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
391 ctx->scantable.permutated, qmat);
394 return (put_bits_count(pb) - saved_pos) >> 3;
397 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
399 int sizes[4], int x, int y, int quant,
402 ProresContext *ctx = avctx->priv_data;
406 int slice_width_factor = av_log2(mbs_per_slice);
407 int num_cblocks, pwidth;
408 int plane_factor, is_chroma;
411 if (ctx->force_quant) {
412 qmat = ctx->quants[0];
413 } else if (quant < MAX_STORED_Q) {
414 qmat = ctx->quants[quant];
416 qmat = ctx->custom_q;
417 for (i = 0; i < 64; i++)
418 qmat[i] = ctx->quant_mat[i] * quant;
421 for (i = 0; i < ctx->num_planes; i++) {
422 is_chroma = (i == 1 || i == 2);
423 plane_factor = slice_width_factor + 2;
425 plane_factor += ctx->chroma_factor - 3;
426 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
430 pwidth = avctx->width;
435 pwidth = avctx->width >> 1;
437 src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
439 get_slice_data(ctx, src, pic->linesize[i], xp, yp,
440 pwidth, avctx->height, ctx->blocks[0], ctx->emu_buf,
441 mbs_per_slice, num_cblocks, is_chroma);
442 sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
443 mbs_per_slice, ctx->blocks[0],
444 num_cblocks, plane_factor,
446 total_size += sizes[i];
451 static inline int estimate_vlc(unsigned codebook, int val)
453 unsigned int rice_order, exp_order, switch_bits, switch_val;
456 /* number of prefix bits to switch between Rice and expGolomb */
457 switch_bits = (codebook & 3) + 1;
458 rice_order = codebook >> 5; /* rice code order */
459 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
461 switch_val = switch_bits << rice_order;
463 if (val >= switch_val) {
464 val -= switch_val - (1 << exp_order);
465 exponent = av_log2(val);
467 return exponent * 2 - exp_order + switch_bits + 1;
469 return (val >> rice_order) + rice_order + 1;
473 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
477 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
480 prev_dc = (blocks[0] - 0x4000) / scale;
481 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
485 *error += FFABS(blocks[0] - 0x4000) % scale;
487 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
488 dc = (blocks[0] - 0x4000) / scale;
489 *error += FFABS(blocks[0] - 0x4000) % scale;
490 delta = dc - prev_dc;
491 new_sign = GET_SIGN(delta);
492 delta = (delta ^ sign) - sign;
493 code = MAKE_CODE(delta);
494 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
495 codebook = (code + (code & 1)) >> 1;
496 codebook = FFMIN(codebook, 3);
504 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
505 int plane_size_factor,
506 const uint8_t *scan, const int16_t *qmat)
509 int run, level, run_cb, lev_cb;
510 int max_coeffs, abs_level;
513 max_coeffs = blocks_per_slice << 6;
514 run_cb = ff_prores_run_to_cb_index[4];
515 lev_cb = ff_prores_lev_to_cb_index[2];
518 for (i = 1; i < 64; i++) {
519 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
520 level = blocks[idx] / qmat[scan[i]];
521 *error += FFABS(blocks[idx]) % qmat[scan[i]];
523 abs_level = FFABS(level);
524 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
525 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
528 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
529 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
540 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
541 const uint16_t *src, int linesize,
543 int blocks_per_mb, int plane_size_factor,
544 const int16_t *qmat, ProresThreadData *td)
546 int blocks_per_slice;
549 blocks_per_slice = mbs_per_slice * blocks_per_mb;
551 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
552 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
553 plane_size_factor, ctx->scantable.permutated, qmat);
555 return FFALIGN(bits, 8);
558 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
559 int trellis_node, int x, int y, int mbs_per_slice,
560 ProresThreadData *td)
562 ProresContext *ctx = avctx->priv_data;
563 int i, q, pq, xp, yp;
565 int slice_width_factor = av_log2(mbs_per_slice);
566 int num_cblocks[MAX_PLANES], pwidth;
567 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
568 const int min_quant = ctx->profile_info->min_quant;
569 const int max_quant = ctx->profile_info->max_quant;
570 int error, bits, bits_limit;
571 int mbs, prev, cur, new_score;
572 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
576 mbs = x + mbs_per_slice;
578 for (i = 0; i < ctx->num_planes; i++) {
579 is_chroma[i] = (i == 1 || i == 2);
580 plane_factor[i] = slice_width_factor + 2;
582 plane_factor[i] += ctx->chroma_factor - 3;
583 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
587 pwidth = avctx->width;
592 pwidth = avctx->width >> 1;
594 src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
596 get_slice_data(ctx, src, pic->linesize[i], xp, yp,
597 pwidth, avctx->height, td->blocks[i], td->emu_buf,
598 mbs_per_slice, num_cblocks[i], is_chroma[i]);
601 for (q = min_quant; q < max_quant + 2; q++) {
602 td->nodes[trellis_node + q].prev_node = -1;
603 td->nodes[trellis_node + q].quant = q;
606 // todo: maybe perform coarser quantising to fit into frame size when needed
607 for (q = min_quant; q <= max_quant; q++) {
610 for (i = 0; i < ctx->num_planes; i++) {
611 bits += estimate_slice_plane(ctx, &error, i,
612 src, pic->linesize[i],
614 num_cblocks[i], plane_factor[i],
617 if (bits > 65000 * 8) {
621 slice_bits[q] = bits;
622 slice_score[q] = error;
624 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
625 slice_bits[max_quant + 1] = slice_bits[max_quant];
626 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
627 overquant = max_quant;
629 for (q = max_quant + 1; q < 128; q++) {
632 if (q < MAX_STORED_Q) {
633 qmat = ctx->quants[q];
636 for (i = 0; i < 64; i++)
637 qmat[i] = ctx->quant_mat[i] * q;
639 for (i = 0; i < ctx->num_planes; i++) {
640 bits += estimate_slice_plane(ctx, &error, i,
641 src, pic->linesize[i],
643 num_cblocks[i], plane_factor[i],
646 if (bits <= ctx->bits_per_mb * mbs_per_slice)
650 slice_bits[max_quant + 1] = bits;
651 slice_score[max_quant + 1] = error;
654 td->nodes[trellis_node + max_quant + 1].quant = overquant;
656 bits_limit = mbs * ctx->bits_per_mb;
657 for (pq = min_quant; pq < max_quant + 2; pq++) {
658 prev = trellis_node - TRELLIS_WIDTH + pq;
660 for (q = min_quant; q < max_quant + 2; q++) {
661 cur = trellis_node + q;
663 bits = td->nodes[prev].bits + slice_bits[q];
664 error = slice_score[q];
665 if (bits > bits_limit)
668 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
669 new_score = td->nodes[prev].score + error;
671 new_score = SCORE_LIMIT;
672 if (td->nodes[cur].prev_node == -1 ||
673 td->nodes[cur].score >= new_score) {
675 td->nodes[cur].bits = bits;
676 td->nodes[cur].score = new_score;
677 td->nodes[cur].prev_node = prev;
682 error = td->nodes[trellis_node + min_quant].score;
683 pq = trellis_node + min_quant;
684 for (q = min_quant + 1; q < max_quant + 2; q++) {
685 if (td->nodes[trellis_node + q].score <= error) {
686 error = td->nodes[trellis_node + q].score;
687 pq = trellis_node + q;
694 static int find_quant_thread(AVCodecContext *avctx, void *arg,
695 int jobnr, int threadnr)
697 ProresContext *ctx = avctx->priv_data;
698 ProresThreadData *td = ctx->tdata + threadnr;
699 int mbs_per_slice = ctx->mbs_per_slice;
700 int x, y = jobnr, mb, q = 0;
702 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
703 while (ctx->mb_width - x < mbs_per_slice)
705 q = find_slice_quant(avctx, avctx->coded_frame,
706 (mb + 1) * TRELLIS_WIDTH, x, y,
710 for (x = ctx->slices_width - 1; x >= 0; x--) {
711 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
712 q = td->nodes[q].prev_node;
718 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
719 const AVFrame *pic, int *got_packet)
721 ProresContext *ctx = avctx->priv_data;
722 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
723 uint8_t *picture_size_pos;
725 int x, y, i, mb, q = 0;
726 int sizes[4] = { 0 };
727 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
728 int frame_size, picture_size, slice_size;
729 int mbs_per_slice = ctx->mbs_per_slice;
732 *avctx->coded_frame = *pic;
733 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
734 avctx->coded_frame->key_frame = 1;
736 pkt_size = ctx->frame_size + FF_MIN_BUFFER_SIZE;
738 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
741 orig_buf = pkt->data;
744 orig_buf += 4; // frame size
745 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
750 buf += 2; // frame header size will be stored here
751 bytestream_put_be16 (&buf, 0); // version 1
752 bytestream_put_buffer(&buf, ctx->vendor, 4);
753 bytestream_put_be16 (&buf, avctx->width);
754 bytestream_put_be16 (&buf, avctx->height);
755 bytestream_put_byte (&buf, ctx->chroma_factor << 6); // frame flags
756 bytestream_put_byte (&buf, 0); // reserved
757 bytestream_put_byte (&buf, avctx->color_primaries);
758 bytestream_put_byte (&buf, avctx->color_trc);
759 bytestream_put_byte (&buf, avctx->colorspace);
760 bytestream_put_byte (&buf, 0x40); // source format and alpha information
761 bytestream_put_byte (&buf, 0); // reserved
762 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
763 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
764 // luma quantisation matrix
765 for (i = 0; i < 64; i++)
766 bytestream_put_byte(&buf, ctx->quant_mat[i]);
767 // chroma quantisation matrix
768 for (i = 0; i < 64; i++)
769 bytestream_put_byte(&buf, ctx->quant_mat[i]);
771 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
773 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
776 picture_size_pos = buf + 1;
777 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
778 buf += 4; // picture data size will be stored here
779 bytestream_put_be16 (&buf, ctx->num_slices); // total number of slices
780 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
782 // seek table - will be filled during slice encoding
784 buf += ctx->num_slices * 2;
787 if (!ctx->force_quant) {
788 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
794 for (y = 0; y < ctx->mb_height; y++) {
795 mbs_per_slice = ctx->mbs_per_slice;
796 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
797 q = ctx->force_quant ? ctx->force_quant
798 : ctx->slice_q[mb + y * ctx->slices_width];
800 while (ctx->mb_width - x < mbs_per_slice)
803 bytestream_put_byte(&buf, slice_hdr_size << 3);
805 buf += slice_hdr_size - 1;
806 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
807 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
809 bytestream_put_byte(&slice_hdr, q);
810 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
811 for (i = 0; i < ctx->num_planes - 1; i++) {
812 bytestream_put_be16(&slice_hdr, sizes[i]);
813 slice_size += sizes[i];
815 bytestream_put_be16(&slice_sizes, slice_size);
816 buf += slice_size - slice_hdr_size;
821 frame_size = buf - orig_buf;
822 picture_size = buf - picture_size_pos - 6;
823 bytestream_put_be32(&orig_buf, frame_size);
824 bytestream_put_be32(&picture_size_pos, picture_size);
826 pkt->size = frame_size;
827 pkt->flags |= AV_PKT_FLAG_KEY;
833 static av_cold int encode_close(AVCodecContext *avctx)
835 ProresContext *ctx = avctx->priv_data;
838 if (avctx->coded_frame->data[0])
839 avctx->release_buffer(avctx, avctx->coded_frame);
841 av_freep(&avctx->coded_frame);
844 for (i = 0; i < avctx->thread_count; i++)
845 av_free(ctx->tdata[i].nodes);
847 av_freep(&ctx->tdata);
848 av_freep(&ctx->slice_q);
853 static av_cold int encode_init(AVCodecContext *avctx)
855 ProresContext *ctx = avctx->priv_data;
858 int min_quant, max_quant;
860 avctx->bits_per_raw_sample = 10;
861 avctx->coded_frame = avcodec_alloc_frame();
862 if (!avctx->coded_frame)
863 return AVERROR(ENOMEM);
865 ff_proresdsp_init(&ctx->dsp, avctx);
866 ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
867 ff_prores_progressive_scan);
869 mps = ctx->mbs_per_slice;
870 if (mps & (mps - 1)) {
871 av_log(avctx, AV_LOG_ERROR,
872 "there should be an integer power of two MBs per slice\n");
873 return AVERROR(EINVAL);
876 ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
879 ctx->profile_info = prores_profile_info + ctx->profile;
882 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
883 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
884 ctx->slices_width = ctx->mb_width / mps;
885 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
886 ctx->num_slices = ctx->mb_height * ctx->slices_width;
888 if (ctx->quant_sel == -1)
889 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
891 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
893 if (strlen(ctx->vendor) != 4) {
894 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
895 return AVERROR_INVALIDDATA;
898 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
899 if (!ctx->force_quant) {
900 if (!ctx->bits_per_mb) {
901 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
902 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
904 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
905 } else if (ctx->bits_per_mb < 128) {
906 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
907 return AVERROR_INVALIDDATA;
910 min_quant = ctx->profile_info->min_quant;
911 max_quant = ctx->profile_info->max_quant;
912 for (i = min_quant; i < MAX_STORED_Q; i++) {
913 for (j = 0; j < 64; j++)
914 ctx->quants[i][j] = ctx->quant_mat[j] * i;
917 ctx->slice_q = av_malloc(ctx->num_slices * sizeof(*ctx->slice_q));
920 return AVERROR(ENOMEM);
923 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
926 return AVERROR(ENOMEM);
929 for (j = 0; j < avctx->thread_count; j++) {
930 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
932 * sizeof(*ctx->tdata->nodes));
933 if (!ctx->tdata[j].nodes) {
935 return AVERROR(ENOMEM);
937 for (i = min_quant; i < max_quant + 2; i++) {
938 ctx->tdata[j].nodes[i].prev_node = -1;
939 ctx->tdata[j].nodes[i].bits = 0;
940 ctx->tdata[j].nodes[i].score = 0;
946 if (ctx->force_quant > 64) {
947 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
948 return AVERROR_INVALIDDATA;
951 for (j = 0; j < 64; j++) {
952 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
953 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
956 ctx->bits_per_mb = ls * 8;
957 if (ctx->chroma_factor == CFACTOR_Y444)
958 ctx->bits_per_mb += ls * 4;
959 if (ctx->num_planes == 4)
960 ctx->bits_per_mb += ls * 4;
963 ctx->frame_size = ctx->num_slices * (2 + 2 * ctx->num_planes
964 + (2 * mps * ctx->bits_per_mb) / 8)
967 avctx->codec_tag = ctx->profile_info->tag;
969 av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
970 ctx->profile, ctx->num_slices, ctx->bits_per_mb);
971 av_log(avctx, AV_LOG_DEBUG, "estimated frame size %d\n",
977 #define OFFSET(x) offsetof(ProresContext, x)
978 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
980 static const AVOption options[] = {
981 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
982 AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
983 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
984 { PRORES_PROFILE_STANDARD },
985 PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
986 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
987 0, 0, VE, "profile" },
988 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
989 0, 0, VE, "profile" },
990 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
991 0, 0, VE, "profile" },
992 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
993 0, 0, VE, "profile" },
994 { "vendor", "vendor ID", OFFSET(vendor),
995 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
996 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
997 AV_OPT_TYPE_INT, { 0 }, 0, 8192, VE },
998 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
999 { -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1000 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { -1 },
1001 0, 0, VE, "quant_mat" },
1002 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_PROXY },
1003 0, 0, VE, "quant_mat" },
1004 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_LT },
1005 0, 0, VE, "quant_mat" },
1006 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_STANDARD },
1007 0, 0, VE, "quant_mat" },
1008 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_HQ },
1009 0, 0, VE, "quant_mat" },
1010 { "default", NULL, 0, AV_OPT_TYPE_CONST, { QUANT_MAT_DEFAULT },
1011 0, 0, VE, "quant_mat" },
1015 static const AVClass proresenc_class = {
1016 .class_name = "ProRes encoder",
1017 .item_name = av_default_item_name,
1019 .version = LIBAVUTIL_VERSION_INT,
1022 AVCodec ff_prores_kostya_encoder = {
1023 .name = "prores_kostya",
1024 .type = AVMEDIA_TYPE_VIDEO,
1025 .id = CODEC_ID_PRORES,
1026 .priv_data_size = sizeof(ProresContext),
1027 .init = encode_init,
1028 .close = encode_close,
1029 .encode2 = encode_frame,
1030 .capabilities = CODEC_CAP_SLICE_THREADS,
1031 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1032 .pix_fmts = (const enum PixelFormat[]) {
1033 PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
1035 .priv_class = &proresenc_class,