4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
29 #include "bytestream.h"
31 #include "proresdsp.h"
32 #include "proresdata.h"
34 #define CFACTOR_Y422 2
35 #define CFACTOR_Y444 3
37 #define MAX_MBS_PER_SLICE 8
39 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
42 PRORES_PROFILE_PROXY = 0,
44 PRORES_PROFILE_STANDARD,
56 static const uint8_t prores_quant_matrices[][64] = {
58 4, 7, 9, 11, 13, 14, 15, 63,
59 7, 7, 11, 12, 14, 15, 63, 63,
60 9, 11, 13, 14, 15, 63, 63, 63,
61 11, 11, 13, 14, 63, 63, 63, 63,
62 11, 13, 14, 63, 63, 63, 63, 63,
63 13, 14, 63, 63, 63, 63, 63, 63,
64 13, 63, 63, 63, 63, 63, 63, 63,
65 63, 63, 63, 63, 63, 63, 63, 63,
68 4, 5, 6, 7, 9, 11, 13, 15,
69 5, 5, 7, 8, 11, 13, 15, 17,
70 6, 7, 9, 11, 13, 15, 15, 17,
71 7, 7, 9, 11, 13, 15, 17, 19,
72 7, 9, 11, 13, 14, 16, 19, 23,
73 9, 11, 13, 14, 16, 19, 23, 29,
74 9, 11, 13, 15, 17, 21, 28, 35,
75 11, 13, 16, 17, 21, 28, 35, 41,
78 4, 4, 5, 5, 6, 7, 7, 9,
79 4, 4, 5, 6, 7, 7, 9, 9,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 5, 6, 7, 7, 9, 9, 10,
82 5, 6, 7, 7, 8, 9, 10, 12,
83 6, 7, 7, 8, 9, 10, 12, 15,
84 6, 7, 7, 9, 10, 11, 14, 17,
85 7, 7, 9, 10, 11, 14, 17, 21,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 5,
92 4, 4, 4, 4, 4, 4, 5, 5,
93 4, 4, 4, 4, 4, 5, 5, 6,
94 4, 4, 4, 4, 5, 5, 6, 7,
95 4, 4, 4, 4, 5, 6, 7, 7,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
109 #define NUM_MB_LIMITS 4
110 static const int prores_mb_limits[NUM_MB_LIMITS] = {
111 1620, // up to 720x576
112 2700, // up to 960x720
113 6075, // up to 1440x1080
114 9216, // up to 2048x1152
117 static const struct prores_profile {
118 const char *full_name;
122 int br_tab[NUM_MB_LIMITS];
124 } prores_profile_info[4] = {
126 .full_name = "proxy",
127 .tag = MKTAG('a', 'p', 'c', 'o'),
130 .br_tab = { 300, 242, 220, 194 },
131 .quant = QUANT_MAT_PROXY,
135 .tag = MKTAG('a', 'p', 'c', 's'),
138 .br_tab = { 720, 560, 490, 440 },
139 .quant = QUANT_MAT_LT,
142 .full_name = "standard",
143 .tag = MKTAG('a', 'p', 'c', 'n'),
146 .br_tab = { 1050, 808, 710, 632 },
147 .quant = QUANT_MAT_STANDARD,
150 .full_name = "high quality",
151 .tag = MKTAG('a', 'p', 'c', 'h'),
154 .br_tab = { 1566, 1216, 1070, 950 },
155 .quant = QUANT_MAT_HQ,
157 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
160 #define TRELLIS_WIDTH 16
161 #define SCORE_LIMIT INT_MAX / 2
170 #define MAX_STORED_Q 16
172 typedef struct ProresThreadData {
173 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
174 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
175 int16_t custom_q[64];
176 struct TrellisNode *nodes;
179 typedef struct ProresContext {
181 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
182 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
183 int16_t quants[MAX_STORED_Q][64];
184 int16_t custom_q[64];
185 const uint8_t *quant_mat;
187 ProresDSPContext dsp;
190 int mb_width, mb_height;
192 int num_chroma_blocks, chroma_factor;
194 int slices_per_picture;
195 int pictures_per_frame; // 1 for progressive, 2 for interlaced
204 int frame_size_upper_bound;
207 const struct prores_profile *profile_info;
211 ProresThreadData *tdata;
214 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
215 int linesize, int x, int y, int w, int h,
216 DCTELEM *blocks, uint16_t *emu_buf,
217 int mbs_per_slice, int blocks_per_mb, int is_chroma)
219 const uint16_t *esrc;
220 const int mb_width = 4 * blocks_per_mb;
224 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
226 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
230 if (x + mb_width <= w && y + 16 <= h) {
232 elinesize = linesize;
237 elinesize = 16 * sizeof(*emu_buf);
239 bw = FFMIN(w - x, mb_width);
240 bh = FFMIN(h - y, 16);
242 for (j = 0; j < bh; j++) {
243 memcpy(emu_buf + j * 16,
244 (const uint8_t*)src + j * linesize,
246 pix = emu_buf[j * 16 + bw - 1];
247 for (k = bw; k < mb_width; k++)
248 emu_buf[j * 16 + k] = pix;
251 memcpy(emu_buf + j * 16,
252 emu_buf + (bh - 1) * 16,
253 mb_width * sizeof(*emu_buf));
256 ctx->dsp.fdct(esrc, elinesize, blocks);
258 if (blocks_per_mb > 2) {
259 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
262 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
264 if (blocks_per_mb > 2) {
265 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
269 ctx->dsp.fdct(esrc, elinesize, blocks);
271 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
273 if (blocks_per_mb > 2) {
274 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
276 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
286 * Write an unsigned rice/exp golomb codeword.
288 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
290 unsigned int rice_order, exp_order, switch_bits, switch_val;
293 /* number of prefix bits to switch between Rice and expGolomb */
294 switch_bits = (codebook & 3) + 1;
295 rice_order = codebook >> 5; /* rice code order */
296 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
298 switch_val = switch_bits << rice_order;
300 if (val >= switch_val) {
301 val -= switch_val - (1 << exp_order);
302 exponent = av_log2(val);
304 put_bits(pb, exponent - exp_order + switch_bits, 0);
305 put_bits(pb, exponent + 1, val);
307 exponent = val >> rice_order;
310 put_bits(pb, exponent, 0);
313 put_sbits(pb, rice_order, val);
317 #define GET_SIGN(x) ((x) >> 31)
318 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
320 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
321 int blocks_per_slice, int scale)
324 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
326 prev_dc = (blocks[0] - 0x4000) / scale;
327 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
332 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
333 dc = (blocks[0] - 0x4000) / scale;
334 delta = dc - prev_dc;
335 new_sign = GET_SIGN(delta);
336 delta = (delta ^ sign) - sign;
337 code = MAKE_CODE(delta);
338 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
339 codebook = (code + (code & 1)) >> 1;
340 codebook = FFMIN(codebook, 3);
346 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
347 int blocks_per_slice,
348 int plane_size_factor,
349 const uint8_t *scan, const int16_t *qmat)
352 int run, level, run_cb, lev_cb;
353 int max_coeffs, abs_level;
355 max_coeffs = blocks_per_slice << 6;
356 run_cb = ff_prores_run_to_cb_index[4];
357 lev_cb = ff_prores_lev_to_cb_index[2];
360 for (i = 1; i < 64; i++) {
361 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
362 level = blocks[idx] / qmat[scan[i]];
364 abs_level = FFABS(level);
365 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
366 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
368 put_sbits(pb, 1, GET_SIGN(level));
370 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
371 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
380 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
381 const uint16_t *src, int linesize,
382 int mbs_per_slice, DCTELEM *blocks,
383 int blocks_per_mb, int plane_size_factor,
386 int blocks_per_slice, saved_pos;
388 saved_pos = put_bits_count(pb);
389 blocks_per_slice = mbs_per_slice * blocks_per_mb;
391 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
392 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
393 ctx->scantable.permutated, qmat);
396 return (put_bits_count(pb) - saved_pos) >> 3;
399 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
401 int sizes[4], int x, int y, int quant,
404 ProresContext *ctx = avctx->priv_data;
408 int slice_width_factor = av_log2(mbs_per_slice);
409 int num_cblocks, pwidth, linesize, line_add;
410 int plane_factor, is_chroma;
413 if (ctx->pictures_per_frame == 1)
416 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
418 if (ctx->force_quant) {
419 qmat = ctx->quants[0];
420 } else if (quant < MAX_STORED_Q) {
421 qmat = ctx->quants[quant];
423 qmat = ctx->custom_q;
424 for (i = 0; i < 64; i++)
425 qmat[i] = ctx->quant_mat[i] * quant;
428 for (i = 0; i < ctx->num_planes; i++) {
429 is_chroma = (i == 1 || i == 2);
430 plane_factor = slice_width_factor + 2;
432 plane_factor += ctx->chroma_factor - 3;
433 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
437 pwidth = avctx->width;
442 pwidth = avctx->width >> 1;
445 linesize = pic->linesize[i] * ctx->pictures_per_frame;
446 src = (const uint16_t*)(pic->data[i] + yp * linesize +
447 line_add * pic->linesize[i]) + xp;
449 get_slice_data(ctx, src, linesize, xp, yp,
450 pwidth, avctx->height / ctx->pictures_per_frame,
451 ctx->blocks[0], ctx->emu_buf,
452 mbs_per_slice, num_cblocks, is_chroma);
453 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
454 mbs_per_slice, ctx->blocks[0],
455 num_cblocks, plane_factor,
457 total_size += sizes[i];
462 static inline int estimate_vlc(unsigned codebook, int val)
464 unsigned int rice_order, exp_order, switch_bits, switch_val;
467 /* number of prefix bits to switch between Rice and expGolomb */
468 switch_bits = (codebook & 3) + 1;
469 rice_order = codebook >> 5; /* rice code order */
470 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
472 switch_val = switch_bits << rice_order;
474 if (val >= switch_val) {
475 val -= switch_val - (1 << exp_order);
476 exponent = av_log2(val);
478 return exponent * 2 - exp_order + switch_bits + 1;
480 return (val >> rice_order) + rice_order + 1;
484 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
488 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
491 prev_dc = (blocks[0] - 0x4000) / scale;
492 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
496 *error += FFABS(blocks[0] - 0x4000) % scale;
498 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
499 dc = (blocks[0] - 0x4000) / scale;
500 *error += FFABS(blocks[0] - 0x4000) % scale;
501 delta = dc - prev_dc;
502 new_sign = GET_SIGN(delta);
503 delta = (delta ^ sign) - sign;
504 code = MAKE_CODE(delta);
505 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
506 codebook = (code + (code & 1)) >> 1;
507 codebook = FFMIN(codebook, 3);
515 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
516 int plane_size_factor,
517 const uint8_t *scan, const int16_t *qmat)
520 int run, level, run_cb, lev_cb;
521 int max_coeffs, abs_level;
524 max_coeffs = blocks_per_slice << 6;
525 run_cb = ff_prores_run_to_cb_index[4];
526 lev_cb = ff_prores_lev_to_cb_index[2];
529 for (i = 1; i < 64; i++) {
530 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
531 level = blocks[idx] / qmat[scan[i]];
532 *error += FFABS(blocks[idx]) % qmat[scan[i]];
534 abs_level = FFABS(level);
535 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
536 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
539 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
540 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
551 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
552 const uint16_t *src, int linesize,
554 int blocks_per_mb, int plane_size_factor,
555 const int16_t *qmat, ProresThreadData *td)
557 int blocks_per_slice;
560 blocks_per_slice = mbs_per_slice * blocks_per_mb;
562 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
563 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
564 plane_size_factor, ctx->scantable.permutated, qmat);
566 return FFALIGN(bits, 8);
569 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
570 int trellis_node, int x, int y, int mbs_per_slice,
571 ProresThreadData *td)
573 ProresContext *ctx = avctx->priv_data;
574 int i, q, pq, xp, yp;
576 int slice_width_factor = av_log2(mbs_per_slice);
577 int num_cblocks[MAX_PLANES], pwidth;
578 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
579 const int min_quant = ctx->profile_info->min_quant;
580 const int max_quant = ctx->profile_info->max_quant;
581 int error, bits, bits_limit;
582 int mbs, prev, cur, new_score;
583 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
586 int linesize[4], line_add;
588 if (ctx->pictures_per_frame == 1)
591 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
592 mbs = x + mbs_per_slice;
594 for (i = 0; i < ctx->num_planes; i++) {
595 is_chroma[i] = (i == 1 || i == 2);
596 plane_factor[i] = slice_width_factor + 2;
598 plane_factor[i] += ctx->chroma_factor - 3;
599 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
603 pwidth = avctx->width;
608 pwidth = avctx->width >> 1;
611 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
612 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
613 line_add * pic->linesize[i]) + xp;
615 get_slice_data(ctx, src, linesize[i], xp, yp,
616 pwidth, avctx->height / ctx->pictures_per_frame,
617 td->blocks[i], td->emu_buf,
618 mbs_per_slice, num_cblocks[i], is_chroma[i]);
621 for (q = min_quant; q < max_quant + 2; q++) {
622 td->nodes[trellis_node + q].prev_node = -1;
623 td->nodes[trellis_node + q].quant = q;
626 // todo: maybe perform coarser quantising to fit into frame size when needed
627 for (q = min_quant; q <= max_quant; q++) {
630 for (i = 0; i < ctx->num_planes; i++) {
631 bits += estimate_slice_plane(ctx, &error, i,
634 num_cblocks[i], plane_factor[i],
637 if (bits > 65000 * 8) {
641 slice_bits[q] = bits;
642 slice_score[q] = error;
644 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
645 slice_bits[max_quant + 1] = slice_bits[max_quant];
646 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
647 overquant = max_quant;
649 for (q = max_quant + 1; q < 128; q++) {
652 if (q < MAX_STORED_Q) {
653 qmat = ctx->quants[q];
656 for (i = 0; i < 64; i++)
657 qmat[i] = ctx->quant_mat[i] * q;
659 for (i = 0; i < ctx->num_planes; i++) {
660 bits += estimate_slice_plane(ctx, &error, i,
663 num_cblocks[i], plane_factor[i],
666 if (bits <= ctx->bits_per_mb * mbs_per_slice)
670 slice_bits[max_quant + 1] = bits;
671 slice_score[max_quant + 1] = error;
674 td->nodes[trellis_node + max_quant + 1].quant = overquant;
676 bits_limit = mbs * ctx->bits_per_mb;
677 for (pq = min_quant; pq < max_quant + 2; pq++) {
678 prev = trellis_node - TRELLIS_WIDTH + pq;
680 for (q = min_quant; q < max_quant + 2; q++) {
681 cur = trellis_node + q;
683 bits = td->nodes[prev].bits + slice_bits[q];
684 error = slice_score[q];
685 if (bits > bits_limit)
688 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
689 new_score = td->nodes[prev].score + error;
691 new_score = SCORE_LIMIT;
692 if (td->nodes[cur].prev_node == -1 ||
693 td->nodes[cur].score >= new_score) {
695 td->nodes[cur].bits = bits;
696 td->nodes[cur].score = new_score;
697 td->nodes[cur].prev_node = prev;
702 error = td->nodes[trellis_node + min_quant].score;
703 pq = trellis_node + min_quant;
704 for (q = min_quant + 1; q < max_quant + 2; q++) {
705 if (td->nodes[trellis_node + q].score <= error) {
706 error = td->nodes[trellis_node + q].score;
707 pq = trellis_node + q;
714 static int find_quant_thread(AVCodecContext *avctx, void *arg,
715 int jobnr, int threadnr)
717 ProresContext *ctx = avctx->priv_data;
718 ProresThreadData *td = ctx->tdata + threadnr;
719 int mbs_per_slice = ctx->mbs_per_slice;
720 int x, y = jobnr, mb, q = 0;
722 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
723 while (ctx->mb_width - x < mbs_per_slice)
725 q = find_slice_quant(avctx, avctx->coded_frame,
726 (mb + 1) * TRELLIS_WIDTH, x, y,
730 for (x = ctx->slices_width - 1; x >= 0; x--) {
731 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
732 q = td->nodes[q].prev_node;
738 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
739 const AVFrame *pic, int *got_packet)
741 ProresContext *ctx = avctx->priv_data;
742 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
743 uint8_t *picture_size_pos;
745 int x, y, i, mb, q = 0;
746 int sizes[4] = { 0 };
747 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
748 int frame_size, picture_size, slice_size;
752 *avctx->coded_frame = *pic;
753 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
754 avctx->coded_frame->key_frame = 1;
756 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
758 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
761 orig_buf = pkt->data;
764 orig_buf += 4; // frame size
765 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
770 buf += 2; // frame header size will be stored here
771 bytestream_put_be16 (&buf, 0); // version 1
772 bytestream_put_buffer(&buf, ctx->vendor, 4);
773 bytestream_put_be16 (&buf, avctx->width);
774 bytestream_put_be16 (&buf, avctx->height);
776 frame_flags = ctx->chroma_factor << 6;
777 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
778 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
779 bytestream_put_byte (&buf, frame_flags);
781 bytestream_put_byte (&buf, 0); // reserved
782 bytestream_put_byte (&buf, avctx->color_primaries);
783 bytestream_put_byte (&buf, avctx->color_trc);
784 bytestream_put_byte (&buf, avctx->colorspace);
785 bytestream_put_byte (&buf, 0x40); // source format and alpha information
786 bytestream_put_byte (&buf, 0); // reserved
787 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
788 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
789 // luma quantisation matrix
790 for (i = 0; i < 64; i++)
791 bytestream_put_byte(&buf, ctx->quant_mat[i]);
792 // chroma quantisation matrix
793 for (i = 0; i < 64; i++)
794 bytestream_put_byte(&buf, ctx->quant_mat[i]);
796 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
798 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
800 for (ctx->cur_picture_idx = 0;
801 ctx->cur_picture_idx < ctx->pictures_per_frame;
802 ctx->cur_picture_idx++) {
804 picture_size_pos = buf + 1;
805 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
806 buf += 4; // picture data size will be stored here
807 bytestream_put_be16 (&buf, ctx->slices_per_picture);
808 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
810 // seek table - will be filled during slice encoding
812 buf += ctx->slices_per_picture * 2;
815 if (!ctx->force_quant) {
816 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
822 for (y = 0; y < ctx->mb_height; y++) {
823 int mbs_per_slice = ctx->mbs_per_slice;
824 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
825 q = ctx->force_quant ? ctx->force_quant
826 : ctx->slice_q[mb + y * ctx->slices_width];
828 while (ctx->mb_width - x < mbs_per_slice)
831 bytestream_put_byte(&buf, slice_hdr_size << 3);
833 buf += slice_hdr_size - 1;
834 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
835 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
837 bytestream_put_byte(&slice_hdr, q);
838 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
839 for (i = 0; i < ctx->num_planes - 1; i++) {
840 bytestream_put_be16(&slice_hdr, sizes[i]);
841 slice_size += sizes[i];
843 bytestream_put_be16(&slice_sizes, slice_size);
844 buf += slice_size - slice_hdr_size;
848 picture_size = buf - (picture_size_pos - 1);
849 bytestream_put_be32(&picture_size_pos, picture_size);
853 frame_size = buf - orig_buf;
854 bytestream_put_be32(&orig_buf, frame_size);
856 pkt->size = frame_size;
857 pkt->flags |= AV_PKT_FLAG_KEY;
863 static av_cold int encode_close(AVCodecContext *avctx)
865 ProresContext *ctx = avctx->priv_data;
868 av_freep(&avctx->coded_frame);
871 for (i = 0; i < avctx->thread_count; i++)
872 av_free(ctx->tdata[i].nodes);
874 av_freep(&ctx->tdata);
875 av_freep(&ctx->slice_q);
880 static av_cold int encode_init(AVCodecContext *avctx)
882 ProresContext *ctx = avctx->priv_data;
885 int min_quant, max_quant;
886 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
888 avctx->bits_per_raw_sample = 10;
889 avctx->coded_frame = avcodec_alloc_frame();
890 if (!avctx->coded_frame)
891 return AVERROR(ENOMEM);
893 ff_proresdsp_init(&ctx->dsp, avctx);
894 ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
895 interlaced ? ff_prores_interlaced_scan
896 : ff_prores_progressive_scan);
898 mps = ctx->mbs_per_slice;
899 if (mps & (mps - 1)) {
900 av_log(avctx, AV_LOG_ERROR,
901 "there should be an integer power of two MBs per slice\n");
902 return AVERROR(EINVAL);
905 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
908 ctx->profile_info = prores_profile_info + ctx->profile;
911 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
914 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
916 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
918 ctx->slices_width = ctx->mb_width / mps;
919 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
920 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
921 ctx->pictures_per_frame = 1 + interlaced;
923 if (ctx->quant_sel == -1)
924 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
926 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
928 if (strlen(ctx->vendor) != 4) {
929 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
930 return AVERROR_INVALIDDATA;
933 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
934 if (!ctx->force_quant) {
935 if (!ctx->bits_per_mb) {
936 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
937 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
938 ctx->pictures_per_frame)
940 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
941 } else if (ctx->bits_per_mb < 128) {
942 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
943 return AVERROR_INVALIDDATA;
946 min_quant = ctx->profile_info->min_quant;
947 max_quant = ctx->profile_info->max_quant;
948 for (i = min_quant; i < MAX_STORED_Q; i++) {
949 for (j = 0; j < 64; j++)
950 ctx->quants[i][j] = ctx->quant_mat[j] * i;
953 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
956 return AVERROR(ENOMEM);
959 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
962 return AVERROR(ENOMEM);
965 for (j = 0; j < avctx->thread_count; j++) {
966 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
968 * sizeof(*ctx->tdata->nodes));
969 if (!ctx->tdata[j].nodes) {
971 return AVERROR(ENOMEM);
973 for (i = min_quant; i < max_quant + 2; i++) {
974 ctx->tdata[j].nodes[i].prev_node = -1;
975 ctx->tdata[j].nodes[i].bits = 0;
976 ctx->tdata[j].nodes[i].score = 0;
982 if (ctx->force_quant > 64) {
983 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
984 return AVERROR_INVALIDDATA;
987 for (j = 0; j < 64; j++) {
988 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
989 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
992 ctx->bits_per_mb = ls * 8;
993 if (ctx->chroma_factor == CFACTOR_Y444)
994 ctx->bits_per_mb += ls * 4;
995 if (ctx->num_planes == 4)
996 ctx->bits_per_mb += ls * 4;
999 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1000 ctx->slices_per_picture *
1001 (2 + 2 * ctx->num_planes +
1002 (mps * ctx->bits_per_mb) / 8)
1005 avctx->codec_tag = ctx->profile_info->tag;
1007 av_log(avctx, AV_LOG_DEBUG,
1008 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1009 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1010 interlaced ? "yes" : "no", ctx->bits_per_mb);
1011 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1012 ctx->frame_size_upper_bound);
1017 #define OFFSET(x) offsetof(ProresContext, x)
1018 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1020 static const AVOption options[] = {
1021 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1022 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1023 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1024 { .i64 = PRORES_PROFILE_STANDARD },
1025 PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1026 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1027 0, 0, VE, "profile" },
1028 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1029 0, 0, VE, "profile" },
1030 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1031 0, 0, VE, "profile" },
1032 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1033 0, 0, VE, "profile" },
1034 { "vendor", "vendor ID", OFFSET(vendor),
1035 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1036 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1037 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1038 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1039 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1040 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1041 0, 0, VE, "quant_mat" },
1042 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1043 0, 0, VE, "quant_mat" },
1044 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1045 0, 0, VE, "quant_mat" },
1046 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1047 0, 0, VE, "quant_mat" },
1048 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1049 0, 0, VE, "quant_mat" },
1050 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1051 0, 0, VE, "quant_mat" },
1055 static const AVClass proresenc_class = {
1056 .class_name = "ProRes encoder",
1057 .item_name = av_default_item_name,
1059 .version = LIBAVUTIL_VERSION_INT,
1062 AVCodec ff_prores_kostya_encoder = {
1063 .name = "prores_kostya",
1064 .type = AVMEDIA_TYPE_VIDEO,
1065 .id = AV_CODEC_ID_PRORES,
1066 .priv_data_size = sizeof(ProresContext),
1067 .init = encode_init,
1068 .close = encode_close,
1069 .encode2 = encode_frame,
1070 .capabilities = CODEC_CAP_SLICE_THREADS,
1071 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1072 .pix_fmts = (const enum AVPixelFormat[]) {
1073 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE
1075 .priv_class = &proresenc_class,