4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
30 #include "bytestream.h"
32 #include "proresdsp.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
40 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
43 PRORES_PROFILE_PROXY = 0,
45 PRORES_PROFILE_STANDARD,
57 static const uint8_t prores_quant_matrices[][64] = {
59 4, 7, 9, 11, 13, 14, 15, 63,
60 7, 7, 11, 12, 14, 15, 63, 63,
61 9, 11, 13, 14, 15, 63, 63, 63,
62 11, 11, 13, 14, 63, 63, 63, 63,
63 11, 13, 14, 63, 63, 63, 63, 63,
64 13, 14, 63, 63, 63, 63, 63, 63,
65 13, 63, 63, 63, 63, 63, 63, 63,
66 63, 63, 63, 63, 63, 63, 63, 63,
69 4, 5, 6, 7, 9, 11, 13, 15,
70 5, 5, 7, 8, 11, 13, 15, 17,
71 6, 7, 9, 11, 13, 15, 15, 17,
72 7, 7, 9, 11, 13, 15, 17, 19,
73 7, 9, 11, 13, 14, 16, 19, 23,
74 9, 11, 13, 14, 16, 19, 23, 29,
75 9, 11, 13, 15, 17, 21, 28, 35,
76 11, 13, 16, 17, 21, 28, 35, 41,
79 4, 4, 5, 5, 6, 7, 7, 9,
80 4, 4, 5, 6, 7, 7, 9, 9,
81 5, 5, 6, 7, 7, 9, 9, 10,
82 5, 5, 6, 7, 7, 9, 9, 10,
83 5, 6, 7, 7, 8, 9, 10, 12,
84 6, 7, 7, 8, 9, 10, 12, 15,
85 6, 7, 7, 9, 10, 11, 14, 17,
86 7, 7, 9, 10, 11, 14, 17, 21,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 5,
93 4, 4, 4, 4, 4, 4, 5, 5,
94 4, 4, 4, 4, 4, 5, 5, 6,
95 4, 4, 4, 4, 5, 5, 6, 7,
96 4, 4, 4, 4, 5, 6, 7, 7,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
110 #define NUM_MB_LIMITS 4
111 static const int prores_mb_limits[NUM_MB_LIMITS] = {
112 1620, // up to 720x576
113 2700, // up to 960x720
114 6075, // up to 1440x1080
115 9216, // up to 2048x1152
118 static const struct prores_profile {
119 const char *full_name;
123 int br_tab[NUM_MB_LIMITS];
125 } prores_profile_info[4] = {
127 .full_name = "proxy",
128 .tag = MKTAG('a', 'p', 'c', 'o'),
131 .br_tab = { 300, 242, 220, 194 },
132 .quant = QUANT_MAT_PROXY,
136 .tag = MKTAG('a', 'p', 'c', 's'),
139 .br_tab = { 720, 560, 490, 440 },
140 .quant = QUANT_MAT_LT,
143 .full_name = "standard",
144 .tag = MKTAG('a', 'p', 'c', 'n'),
147 .br_tab = { 1050, 808, 710, 632 },
148 .quant = QUANT_MAT_STANDARD,
151 .full_name = "high quality",
152 .tag = MKTAG('a', 'p', 'c', 'h'),
155 .br_tab = { 1566, 1216, 1070, 950 },
156 .quant = QUANT_MAT_HQ,
158 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
161 #define TRELLIS_WIDTH 16
162 #define SCORE_LIMIT INT_MAX / 2
171 #define MAX_STORED_Q 16
173 typedef struct ProresThreadData {
174 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
175 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
176 int16_t custom_q[64];
177 struct TrellisNode *nodes;
180 typedef struct ProresContext {
182 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
183 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
184 int16_t quants[MAX_STORED_Q][64];
185 int16_t custom_q[64];
186 const uint8_t *quant_mat;
188 ProresDSPContext dsp;
191 int mb_width, mb_height;
193 int num_chroma_blocks, chroma_factor;
195 int slices_per_picture;
196 int pictures_per_frame; // 1 for progressive, 2 for interlaced
205 int frame_size_upper_bound;
208 const struct prores_profile *profile_info;
212 ProresThreadData *tdata;
215 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
216 int linesize, int x, int y, int w, int h,
217 int16_t *blocks, uint16_t *emu_buf,
218 int mbs_per_slice, int blocks_per_mb, int is_chroma)
220 const uint16_t *esrc;
221 const int mb_width = 4 * blocks_per_mb;
225 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
227 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
231 if (x + mb_width <= w && y + 16 <= h) {
233 elinesize = linesize;
238 elinesize = 16 * sizeof(*emu_buf);
240 bw = FFMIN(w - x, mb_width);
241 bh = FFMIN(h - y, 16);
243 for (j = 0; j < bh; j++) {
244 memcpy(emu_buf + j * 16,
245 (const uint8_t*)src + j * linesize,
247 pix = emu_buf[j * 16 + bw - 1];
248 for (k = bw; k < mb_width; k++)
249 emu_buf[j * 16 + k] = pix;
252 memcpy(emu_buf + j * 16,
253 emu_buf + (bh - 1) * 16,
254 mb_width * sizeof(*emu_buf));
257 ctx->dsp.fdct(esrc, elinesize, blocks);
259 if (blocks_per_mb > 2) {
260 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
263 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
265 if (blocks_per_mb > 2) {
266 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
270 ctx->dsp.fdct(esrc, elinesize, blocks);
272 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
274 if (blocks_per_mb > 2) {
275 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
277 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
287 * Write an unsigned rice/exp golomb codeword.
289 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
291 unsigned int rice_order, exp_order, switch_bits, switch_val;
294 /* number of prefix bits to switch between Rice and expGolomb */
295 switch_bits = (codebook & 3) + 1;
296 rice_order = codebook >> 5; /* rice code order */
297 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
299 switch_val = switch_bits << rice_order;
301 if (val >= switch_val) {
302 val -= switch_val - (1 << exp_order);
303 exponent = av_log2(val);
305 put_bits(pb, exponent - exp_order + switch_bits, 0);
306 put_bits(pb, exponent + 1, val);
308 exponent = val >> rice_order;
311 put_bits(pb, exponent, 0);
314 put_sbits(pb, rice_order, val);
318 #define GET_SIGN(x) ((x) >> 31)
319 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
321 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
322 int blocks_per_slice, int scale)
325 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
327 prev_dc = (blocks[0] - 0x4000) / scale;
328 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
333 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
334 dc = (blocks[0] - 0x4000) / scale;
335 delta = dc - prev_dc;
336 new_sign = GET_SIGN(delta);
337 delta = (delta ^ sign) - sign;
338 code = MAKE_CODE(delta);
339 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
340 codebook = (code + (code & 1)) >> 1;
341 codebook = FFMIN(codebook, 3);
347 static void encode_acs(PutBitContext *pb, int16_t *blocks,
348 int blocks_per_slice,
349 int plane_size_factor,
350 const uint8_t *scan, const int16_t *qmat)
353 int run, level, run_cb, lev_cb;
354 int max_coeffs, abs_level;
356 max_coeffs = blocks_per_slice << 6;
357 run_cb = ff_prores_run_to_cb_index[4];
358 lev_cb = ff_prores_lev_to_cb_index[2];
361 for (i = 1; i < 64; i++) {
362 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
363 level = blocks[idx] / qmat[scan[i]];
365 abs_level = FFABS(level);
366 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
367 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
369 put_sbits(pb, 1, GET_SIGN(level));
371 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
372 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
381 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
382 const uint16_t *src, int linesize,
383 int mbs_per_slice, int16_t *blocks,
384 int blocks_per_mb, int plane_size_factor,
387 int blocks_per_slice, saved_pos;
389 saved_pos = put_bits_count(pb);
390 blocks_per_slice = mbs_per_slice * blocks_per_mb;
392 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
393 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
394 ctx->scantable.permutated, qmat);
397 return (put_bits_count(pb) - saved_pos) >> 3;
400 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
402 int sizes[4], int x, int y, int quant,
405 ProresContext *ctx = avctx->priv_data;
409 int slice_width_factor = av_log2(mbs_per_slice);
410 int num_cblocks, pwidth, linesize, line_add;
411 int plane_factor, is_chroma;
414 if (ctx->pictures_per_frame == 1)
417 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
419 if (ctx->force_quant) {
420 qmat = ctx->quants[0];
421 } else if (quant < MAX_STORED_Q) {
422 qmat = ctx->quants[quant];
424 qmat = ctx->custom_q;
425 for (i = 0; i < 64; i++)
426 qmat[i] = ctx->quant_mat[i] * quant;
429 for (i = 0; i < ctx->num_planes; i++) {
430 is_chroma = (i == 1 || i == 2);
431 plane_factor = slice_width_factor + 2;
433 plane_factor += ctx->chroma_factor - 3;
434 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
438 pwidth = avctx->width;
443 pwidth = avctx->width >> 1;
446 linesize = pic->linesize[i] * ctx->pictures_per_frame;
447 src = (const uint16_t*)(pic->data[i] + yp * linesize +
448 line_add * pic->linesize[i]) + xp;
450 get_slice_data(ctx, src, linesize, xp, yp,
451 pwidth, avctx->height / ctx->pictures_per_frame,
452 ctx->blocks[0], ctx->emu_buf,
453 mbs_per_slice, num_cblocks, is_chroma);
454 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
455 mbs_per_slice, ctx->blocks[0],
456 num_cblocks, plane_factor,
458 total_size += sizes[i];
463 static inline int estimate_vlc(unsigned codebook, int val)
465 unsigned int rice_order, exp_order, switch_bits, switch_val;
468 /* number of prefix bits to switch between Rice and expGolomb */
469 switch_bits = (codebook & 3) + 1;
470 rice_order = codebook >> 5; /* rice code order */
471 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
473 switch_val = switch_bits << rice_order;
475 if (val >= switch_val) {
476 val -= switch_val - (1 << exp_order);
477 exponent = av_log2(val);
479 return exponent * 2 - exp_order + switch_bits + 1;
481 return (val >> rice_order) + rice_order + 1;
485 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
489 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
492 prev_dc = (blocks[0] - 0x4000) / scale;
493 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
497 *error += FFABS(blocks[0] - 0x4000) % scale;
499 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
500 dc = (blocks[0] - 0x4000) / scale;
501 *error += FFABS(blocks[0] - 0x4000) % scale;
502 delta = dc - prev_dc;
503 new_sign = GET_SIGN(delta);
504 delta = (delta ^ sign) - sign;
505 code = MAKE_CODE(delta);
506 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
507 codebook = (code + (code & 1)) >> 1;
508 codebook = FFMIN(codebook, 3);
516 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
517 int plane_size_factor,
518 const uint8_t *scan, const int16_t *qmat)
521 int run, level, run_cb, lev_cb;
522 int max_coeffs, abs_level;
525 max_coeffs = blocks_per_slice << 6;
526 run_cb = ff_prores_run_to_cb_index[4];
527 lev_cb = ff_prores_lev_to_cb_index[2];
530 for (i = 1; i < 64; i++) {
531 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
532 level = blocks[idx] / qmat[scan[i]];
533 *error += FFABS(blocks[idx]) % qmat[scan[i]];
535 abs_level = FFABS(level);
536 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
537 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
540 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
541 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
552 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
553 const uint16_t *src, int linesize,
555 int blocks_per_mb, int plane_size_factor,
556 const int16_t *qmat, ProresThreadData *td)
558 int blocks_per_slice;
561 blocks_per_slice = mbs_per_slice * blocks_per_mb;
563 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
564 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
565 plane_size_factor, ctx->scantable.permutated, qmat);
567 return FFALIGN(bits, 8);
570 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
571 int trellis_node, int x, int y, int mbs_per_slice,
572 ProresThreadData *td)
574 ProresContext *ctx = avctx->priv_data;
575 int i, q, pq, xp, yp;
577 int slice_width_factor = av_log2(mbs_per_slice);
578 int num_cblocks[MAX_PLANES], pwidth;
579 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
580 const int min_quant = ctx->profile_info->min_quant;
581 const int max_quant = ctx->profile_info->max_quant;
582 int error, bits, bits_limit;
583 int mbs, prev, cur, new_score;
584 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
587 int linesize[4], line_add;
589 if (ctx->pictures_per_frame == 1)
592 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
593 mbs = x + mbs_per_slice;
595 for (i = 0; i < ctx->num_planes; i++) {
596 is_chroma[i] = (i == 1 || i == 2);
597 plane_factor[i] = slice_width_factor + 2;
599 plane_factor[i] += ctx->chroma_factor - 3;
600 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
604 pwidth = avctx->width;
609 pwidth = avctx->width >> 1;
612 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
613 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
614 line_add * pic->linesize[i]) + xp;
616 get_slice_data(ctx, src, linesize[i], xp, yp,
617 pwidth, avctx->height / ctx->pictures_per_frame,
618 td->blocks[i], td->emu_buf,
619 mbs_per_slice, num_cblocks[i], is_chroma[i]);
622 for (q = min_quant; q < max_quant + 2; q++) {
623 td->nodes[trellis_node + q].prev_node = -1;
624 td->nodes[trellis_node + q].quant = q;
627 // todo: maybe perform coarser quantising to fit into frame size when needed
628 for (q = min_quant; q <= max_quant; q++) {
631 for (i = 0; i < ctx->num_planes; i++) {
632 bits += estimate_slice_plane(ctx, &error, i,
635 num_cblocks[i], plane_factor[i],
638 if (bits > 65000 * 8) {
642 slice_bits[q] = bits;
643 slice_score[q] = error;
645 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
646 slice_bits[max_quant + 1] = slice_bits[max_quant];
647 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
648 overquant = max_quant;
650 for (q = max_quant + 1; q < 128; q++) {
653 if (q < MAX_STORED_Q) {
654 qmat = ctx->quants[q];
657 for (i = 0; i < 64; i++)
658 qmat[i] = ctx->quant_mat[i] * q;
660 for (i = 0; i < ctx->num_planes; i++) {
661 bits += estimate_slice_plane(ctx, &error, i,
664 num_cblocks[i], plane_factor[i],
667 if (bits <= ctx->bits_per_mb * mbs_per_slice)
671 slice_bits[max_quant + 1] = bits;
672 slice_score[max_quant + 1] = error;
675 td->nodes[trellis_node + max_quant + 1].quant = overquant;
677 bits_limit = mbs * ctx->bits_per_mb;
678 for (pq = min_quant; pq < max_quant + 2; pq++) {
679 prev = trellis_node - TRELLIS_WIDTH + pq;
681 for (q = min_quant; q < max_quant + 2; q++) {
682 cur = trellis_node + q;
684 bits = td->nodes[prev].bits + slice_bits[q];
685 error = slice_score[q];
686 if (bits > bits_limit)
689 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
690 new_score = td->nodes[prev].score + error;
692 new_score = SCORE_LIMIT;
693 if (td->nodes[cur].prev_node == -1 ||
694 td->nodes[cur].score >= new_score) {
696 td->nodes[cur].bits = bits;
697 td->nodes[cur].score = new_score;
698 td->nodes[cur].prev_node = prev;
703 error = td->nodes[trellis_node + min_quant].score;
704 pq = trellis_node + min_quant;
705 for (q = min_quant + 1; q < max_quant + 2; q++) {
706 if (td->nodes[trellis_node + q].score <= error) {
707 error = td->nodes[trellis_node + q].score;
708 pq = trellis_node + q;
715 static int find_quant_thread(AVCodecContext *avctx, void *arg,
716 int jobnr, int threadnr)
718 ProresContext *ctx = avctx->priv_data;
719 ProresThreadData *td = ctx->tdata + threadnr;
720 int mbs_per_slice = ctx->mbs_per_slice;
721 int x, y = jobnr, mb, q = 0;
723 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
724 while (ctx->mb_width - x < mbs_per_slice)
726 q = find_slice_quant(avctx, avctx->coded_frame,
727 (mb + 1) * TRELLIS_WIDTH, x, y,
731 for (x = ctx->slices_width - 1; x >= 0; x--) {
732 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
733 q = td->nodes[q].prev_node;
739 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
740 const AVFrame *pic, int *got_packet)
742 ProresContext *ctx = avctx->priv_data;
743 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
744 uint8_t *picture_size_pos;
746 int x, y, i, mb, q = 0;
747 int sizes[4] = { 0 };
748 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
749 int frame_size, picture_size, slice_size;
753 *avctx->coded_frame = *pic;
754 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
755 avctx->coded_frame->key_frame = 1;
757 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
759 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
762 orig_buf = pkt->data;
765 orig_buf += 4; // frame size
766 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
771 buf += 2; // frame header size will be stored here
772 bytestream_put_be16 (&buf, 0); // version 1
773 bytestream_put_buffer(&buf, ctx->vendor, 4);
774 bytestream_put_be16 (&buf, avctx->width);
775 bytestream_put_be16 (&buf, avctx->height);
777 frame_flags = ctx->chroma_factor << 6;
778 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
779 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
780 bytestream_put_byte (&buf, frame_flags);
782 bytestream_put_byte (&buf, 0); // reserved
783 bytestream_put_byte (&buf, avctx->color_primaries);
784 bytestream_put_byte (&buf, avctx->color_trc);
785 bytestream_put_byte (&buf, avctx->colorspace);
786 bytestream_put_byte (&buf, 0x40); // source format and alpha information
787 bytestream_put_byte (&buf, 0); // reserved
788 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
789 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
790 // luma quantisation matrix
791 for (i = 0; i < 64; i++)
792 bytestream_put_byte(&buf, ctx->quant_mat[i]);
793 // chroma quantisation matrix
794 for (i = 0; i < 64; i++)
795 bytestream_put_byte(&buf, ctx->quant_mat[i]);
797 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
799 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
801 for (ctx->cur_picture_idx = 0;
802 ctx->cur_picture_idx < ctx->pictures_per_frame;
803 ctx->cur_picture_idx++) {
805 picture_size_pos = buf + 1;
806 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
807 buf += 4; // picture data size will be stored here
808 bytestream_put_be16 (&buf, ctx->slices_per_picture);
809 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
811 // seek table - will be filled during slice encoding
813 buf += ctx->slices_per_picture * 2;
816 if (!ctx->force_quant) {
817 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
823 for (y = 0; y < ctx->mb_height; y++) {
824 int mbs_per_slice = ctx->mbs_per_slice;
825 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
826 q = ctx->force_quant ? ctx->force_quant
827 : ctx->slice_q[mb + y * ctx->slices_width];
829 while (ctx->mb_width - x < mbs_per_slice)
832 bytestream_put_byte(&buf, slice_hdr_size << 3);
834 buf += slice_hdr_size - 1;
835 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
836 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
838 bytestream_put_byte(&slice_hdr, q);
839 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
840 for (i = 0; i < ctx->num_planes - 1; i++) {
841 bytestream_put_be16(&slice_hdr, sizes[i]);
842 slice_size += sizes[i];
844 bytestream_put_be16(&slice_sizes, slice_size);
845 buf += slice_size - slice_hdr_size;
849 picture_size = buf - (picture_size_pos - 1);
850 bytestream_put_be32(&picture_size_pos, picture_size);
854 frame_size = buf - orig_buf;
855 bytestream_put_be32(&orig_buf, frame_size);
857 pkt->size = frame_size;
858 pkt->flags |= AV_PKT_FLAG_KEY;
864 static av_cold int encode_close(AVCodecContext *avctx)
866 ProresContext *ctx = avctx->priv_data;
869 av_freep(&avctx->coded_frame);
872 for (i = 0; i < avctx->thread_count; i++)
873 av_free(ctx->tdata[i].nodes);
875 av_freep(&ctx->tdata);
876 av_freep(&ctx->slice_q);
881 static av_cold int encode_init(AVCodecContext *avctx)
883 ProresContext *ctx = avctx->priv_data;
886 int min_quant, max_quant;
887 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
889 avctx->bits_per_raw_sample = 10;
890 avctx->coded_frame = avcodec_alloc_frame();
891 if (!avctx->coded_frame)
892 return AVERROR(ENOMEM);
894 ff_proresdsp_init(&ctx->dsp, avctx);
895 ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
896 interlaced ? ff_prores_interlaced_scan
897 : ff_prores_progressive_scan);
899 mps = ctx->mbs_per_slice;
900 if (mps & (mps - 1)) {
901 av_log(avctx, AV_LOG_ERROR,
902 "there should be an integer power of two MBs per slice\n");
903 return AVERROR(EINVAL);
906 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
909 ctx->profile_info = prores_profile_info + ctx->profile;
912 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
915 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
917 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
919 ctx->slices_width = ctx->mb_width / mps;
920 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
921 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
922 ctx->pictures_per_frame = 1 + interlaced;
924 if (ctx->quant_sel == -1)
925 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
927 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
929 if (strlen(ctx->vendor) != 4) {
930 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
931 return AVERROR_INVALIDDATA;
934 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
935 if (!ctx->force_quant) {
936 if (!ctx->bits_per_mb) {
937 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
938 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
939 ctx->pictures_per_frame)
941 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
942 } else if (ctx->bits_per_mb < 128) {
943 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
944 return AVERROR_INVALIDDATA;
947 min_quant = ctx->profile_info->min_quant;
948 max_quant = ctx->profile_info->max_quant;
949 for (i = min_quant; i < MAX_STORED_Q; i++) {
950 for (j = 0; j < 64; j++)
951 ctx->quants[i][j] = ctx->quant_mat[j] * i;
954 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
957 return AVERROR(ENOMEM);
960 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
963 return AVERROR(ENOMEM);
966 for (j = 0; j < avctx->thread_count; j++) {
967 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
969 * sizeof(*ctx->tdata->nodes));
970 if (!ctx->tdata[j].nodes) {
972 return AVERROR(ENOMEM);
974 for (i = min_quant; i < max_quant + 2; i++) {
975 ctx->tdata[j].nodes[i].prev_node = -1;
976 ctx->tdata[j].nodes[i].bits = 0;
977 ctx->tdata[j].nodes[i].score = 0;
983 if (ctx->force_quant > 64) {
984 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
985 return AVERROR_INVALIDDATA;
988 for (j = 0; j < 64; j++) {
989 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
990 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
993 ctx->bits_per_mb = ls * 8;
994 if (ctx->chroma_factor == CFACTOR_Y444)
995 ctx->bits_per_mb += ls * 4;
996 if (ctx->num_planes == 4)
997 ctx->bits_per_mb += ls * 4;
1000 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1001 ctx->slices_per_picture *
1002 (2 + 2 * ctx->num_planes +
1003 (mps * ctx->bits_per_mb) / 8)
1006 avctx->codec_tag = ctx->profile_info->tag;
1008 av_log(avctx, AV_LOG_DEBUG,
1009 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1010 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1011 interlaced ? "yes" : "no", ctx->bits_per_mb);
1012 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1013 ctx->frame_size_upper_bound);
1018 #define OFFSET(x) offsetof(ProresContext, x)
1019 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1021 static const AVOption options[] = {
1022 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1023 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1024 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1025 { .i64 = PRORES_PROFILE_STANDARD },
1026 PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1027 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1028 0, 0, VE, "profile" },
1029 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1030 0, 0, VE, "profile" },
1031 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1032 0, 0, VE, "profile" },
1033 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1034 0, 0, VE, "profile" },
1035 { "vendor", "vendor ID", OFFSET(vendor),
1036 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1037 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1038 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1039 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1040 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1041 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1042 0, 0, VE, "quant_mat" },
1043 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1044 0, 0, VE, "quant_mat" },
1045 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1046 0, 0, VE, "quant_mat" },
1047 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1048 0, 0, VE, "quant_mat" },
1049 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1050 0, 0, VE, "quant_mat" },
1051 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1052 0, 0, VE, "quant_mat" },
1056 static const AVClass proresenc_class = {
1057 .class_name = "ProRes encoder",
1058 .item_name = av_default_item_name,
1060 .version = LIBAVUTIL_VERSION_INT,
1063 AVCodec ff_prores_kostya_encoder = {
1064 .name = "prores_kostya",
1065 .type = AVMEDIA_TYPE_VIDEO,
1066 .id = AV_CODEC_ID_PRORES,
1067 .priv_data_size = sizeof(ProresContext),
1068 .init = encode_init,
1069 .close = encode_close,
1070 .encode2 = encode_frame,
1071 .capabilities = CODEC_CAP_SLICE_THREADS,
1072 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1073 .pix_fmts = (const enum AVPixelFormat[]) {
1074 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE
1076 .priv_class = &proresenc_class,