/*
* VC3/DNxHD encoder
* Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
+ * Copyright (c) 2011 MirriAd Ltd
*
* VC-3 encoder funded by the British Broadcasting Corporation
+ * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
//#define DEBUG
#define RC_VARIANCE 1 // use variance or ssd for fast rc
+#include "libavutil/opt.h"
#include "avcodec.h"
#include "dsputil.h"
+#include "internal.h"
#include "mpegvideo.h"
+#include "mpegvideo_common.h"
#include "dnxhdenc.h"
-int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+#define DNX10BIT_QMAT_SHIFT 18 // The largest value that will not lead to overflow for 10bit samples.
+
+static const AVOption options[]={
+ {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_INT, {.dbl = 0}, 0, 1, VE},
+{NULL}
+};
+static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
#define LAMBDA_FRAC_BITS 10
-static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+static void dnxhd_8bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{
int i;
for (i = 0; i < 4; i++) {
pixels += line_size;
block += 8;
}
- memcpy(block , block- 8, sizeof(*block)*8);
- memcpy(block+ 8, block-16, sizeof(*block)*8);
- memcpy(block+16, block-24, sizeof(*block)*8);
- memcpy(block+24, block-32, sizeof(*block)*8);
+ memcpy(block, block - 8, sizeof(*block) * 8);
+ memcpy(block + 8, block - 16, sizeof(*block) * 8);
+ memcpy(block + 16, block - 24, sizeof(*block) * 8);
+ memcpy(block + 24, block - 32, sizeof(*block) * 8);
+}
+
+static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+ int i;
+
+ block += 32;
+
+ for (i = 0; i < 4; i++) {
+ memcpy(block + i * 8, pixels + i * line_size, 8 * sizeof(*block));
+ memcpy(block - (i+1) * 8, pixels + i * line_size, 8 * sizeof(*block));
+ }
+}
+
+static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, DCTELEM *block,
+ int n, int qscale, int *overflow)
+{
+ const uint8_t *scantable= ctx->intra_scantable.scantable;
+ const int *qmat = ctx->q_intra_matrix[qscale];
+ int last_non_zero = 0;
+ int i;
+
+ ctx->dsp.fdct(block);
+
+ // Divide by 4 with rounding, to compensate scaling of DCT coefficients
+ block[0] = (block[0] + 2) >> 2;
+
+ for (i = 1; i < 64; ++i) {
+ int j = scantable[i];
+ int sign = block[j] >> 31;
+ int level = (block[j] ^ sign) - sign;
+ level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT;
+ block[j] = (level ^ sign) - sign;
+ if (level)
+ last_non_zero = i;
+ }
+
+ return last_non_zero;
}
static int dnxhd_init_vlc(DNXHDEncContext *ctx)
int max_level = 1<<(ctx->cid_table->bit_depth+2);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes), fail);
- FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits , max_level*4*sizeof(*ctx->vlc_bits ), fail);
- FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2 , fail);
- FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits , 63 , fail);
+ FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits, max_level*4*sizeof(*ctx->vlc_bits) , fail);
+ FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2, fail);
+ FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits, 63, fail);
ctx->vlc_codes += max_level*2;
ctx->vlc_bits += max_level*2;
// init first elem to 1 to avoid div by 0 in convert_matrix
uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t*
int qscale, i;
+ const uint8_t *luma_weight_table = ctx->cid_table->luma_weight;
+ const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
- FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int) , fail);
- FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int) , fail);
+ FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail);
+ FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
- for (i = 1; i < 64; i++) {
- int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
- weight_matrix[j] = ctx->cid_table->luma_weight[i];
- }
- ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
- ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
- for (i = 1; i < 64; i++) {
- int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
- weight_matrix[j] = ctx->cid_table->chroma_weight[i];
- }
- ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
- ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
- for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
- for (i = 0; i < 64; i++) {
- ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
- ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
- ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
+ if (ctx->cid_table->bit_depth == 8) {
+ for (i = 1; i < 64; i++) {
+ int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+ weight_matrix[j] = ctx->cid_table->luma_weight[i];
+ }
+ ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
+ ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
+ for (i = 1; i < 64; i++) {
+ int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+ weight_matrix[j] = ctx->cid_table->chroma_weight[i];
+ }
+ ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
+ ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
+
+ for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
+ for (i = 0; i < 64; i++) {
+ ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
+ ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
+ ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
+ }
+ }
+ } else {
+ // 10-bit
+ for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
+ for (i = 1; i < 64; i++) {
+ int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+
+ // The quantization formula from the VC-3 standard is:
+ // quantized = sign(block[i]) * floor(abs(block[i]/s) * p / (qscale * weight_table[i]))
+ // Where p is 32 for 8-bit samples and 8 for 10-bit ones.
+ // The s factor compensates scaling of DCT coefficients done by the DCT routines,
+ // and therefore is not present in standard. It's 8 for 8-bit samples and 4 for 10-bit ones.
+ // We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
+ // ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / (qscale * weight_table[i])
+ // For 10-bit samples, p / s == 2
+ ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * luma_weight_table[i]);
+ ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * chroma_weight_table[i]);
+ }
}
}
+
return 0;
fail:
return -1;
if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry), fail);
- ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4) * 8;
+ ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4 - ctx->min_padding) * 8;
ctx->qscale = 1;
ctx->lambda = 2<<LAMBDA_FRAC_BITS; // qscale 2
return 0;
static int dnxhd_encode_init(AVCodecContext *avctx)
{
DNXHDEncContext *ctx = avctx->priv_data;
- int i, index;
+ int i, index, bit_depth;
+
+ switch (avctx->pix_fmt) {
+ case PIX_FMT_YUV422P:
+ bit_depth = 8;
+ break;
+ case PIX_FMT_YUV422P10:
+ bit_depth = 10;
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "pixel format is incompatible with DNxHD\n");
+ return -1;
+ }
- ctx->cid = ff_dnxhd_find_cid(avctx);
- if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
+ ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth);
+ if (!ctx->cid) {
av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
return -1;
}
ctx->m.mb_intra = 1;
ctx->m.h263_aic = 1;
- ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
+ avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
- dsputil_init(&ctx->m.dsp, avctx);
+ ff_dsputil_init(&ctx->m.dsp, avctx);
ff_dct_common_init(&ctx->m);
+ if (!ctx->m.dct_quantize)
+ ctx->m.dct_quantize = ff_dct_quantize_c;
+
+ if (ctx->cid_table->bit_depth == 10) {
+ ctx->m.dct_quantize = dnxhd_10bit_dct_quantize;
+ ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
+ ctx->block_width_l2 = 4;
+ } else {
+ ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
+ ctx->block_width_l2 = 3;
+ }
+
#if HAVE_MMX
ff_dnxhd_init_mmx(ctx);
#endif
- if (!ctx->m.dct_quantize)
- ctx->m.dct_quantize = dct_quantize_c;
ctx->m.mb_height = (avctx->height + 15) / 16;
ctx->m.mb_width = (avctx->width + 15) / 16;
if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0) // XXX tune lbias/cbias
return -1;
+ // Avid Nitris hardware decoder requires a minimum amount of padding in the coding unit payload
+ if (ctx->nitris_compat)
+ ctx->min_padding = 1600;
+
if (dnxhd_init_vlc(ctx) < 0)
return -1;
if (dnxhd_init_rc(ctx) < 0)
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_offs, ctx->m.mb_height*sizeof(uint32_t), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_bits, ctx->m.mb_num *sizeof(uint16_t), fail);
- FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t) , fail);
+ FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t), fail);
ctx->frame.key_frame = 1;
- ctx->frame.pict_type = FF_I_TYPE;
+ ctx->frame.pict_type = AV_PICTURE_TYPE_I;
ctx->m.avctx->coded_frame = &ctx->frame;
if (avctx->thread_count > MAX_THREADS) {
buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01;
buf[6] = 0x80; // crc flag off
buf[7] = 0xa0; // reserved
- AV_WB16(buf + 0x18, avctx->height); // ALPF
+ AV_WB16(buf + 0x18, avctx->height>>ctx->interlaced); // ALPF
AV_WB16(buf + 0x1a, avctx->width); // SPL
- AV_WB16(buf + 0x1d, avctx->height); // NAL
+ AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced); // NAL
- buf[0x21] = 0x38; // FIXME 8 bit per comp
- buf[0x22] = 0x88 + (ctx->frame.interlaced_frame<<2);
+ buf[0x21] = ctx->cid_table->bit_depth == 10 ? 0x58 : 0x38;
+ buf[0x22] = 0x88 + (ctx->interlaced<<2);
AV_WB32(buf + 0x28, ctx->cid); // CID
buf[0x2c] = ctx->interlaced ? 0 : 0x80;
if (level) {
if (level < 0) {
level = (1-2*level) * qscale * weight_matrix[i];
- if (weight_matrix[i] != 32)
- level += 32;
- level >>= 6;
+ if (ctx->cid_table->bit_depth == 10) {
+ if (weight_matrix[i] != 8)
+ level += 8;
+ level >>= 4;
+ } else {
+ if (weight_matrix[i] != 32)
+ level += 32;
+ level >>= 6;
+ }
level = -level;
} else {
level = (2*level+1) * qscale * weight_matrix[i];
- if (weight_matrix[i] != 32)
- level += 32;
- level >>= 6;
+ if (ctx->cid_table->bit_depth == 10) {
+ if (weight_matrix[i] != 8)
+ level += 8;
+ level >>= 4;
+ } else {
+ if (weight_matrix[i] != 32)
+ level += 32;
+ level >>= 6;
+ }
}
block[j] = level;
}
int score = 0;
int i;
for (i = 0; i < 64; i++)
- score += (block[i]-qblock[i])*(block[i]-qblock[i]);
+ score += (block[i] - qblock[i]) * (block[i] - qblock[i]);
return score;
}
static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
{
- const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4);
- const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
- const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
+ const int bs = ctx->block_width_l2;
+ const int bw = 1 << bs;
+ const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs+1);
+ const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
+ const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
DSPContext *dsp = &ctx->m.dsp;
- dsp->get_pixels(ctx->blocks[0], ptr_y , ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[2], ptr_u , ctx->m.uvlinesize);
- dsp->get_pixels(ctx->blocks[3], ptr_v , ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
if (ctx->interlaced) {
- ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
- ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
- ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
- ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
} else {
- dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]);
- dsp->clear_block(ctx->blocks[6]); dsp->clear_block(ctx->blocks[7]);
+ dsp->clear_block(ctx->blocks[4]);
+ dsp->clear_block(ctx->blocks[5]);
+ dsp->clear_block(ctx->blocks[6]);
+ dsp->clear_block(ctx->blocks[7]);
}
} else {
- dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
- dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
}
}
DNXHDEncContext *ctx = avctx->priv_data;
int mb_y = jobnr, mb_x;
int qscale = ctx->qscale;
+ LOCAL_ALIGNED_16(DCTELEM, block, [64]);
ctx = ctx->thread[threadnr];
ctx->m.last_dc[0] =
ctx->m.last_dc[1] =
- ctx->m.last_dc[2] = 1024;
+ ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
dnxhd_get_blocks(ctx, mb_x, mb_y);
for (i = 0; i < 8; i++) {
- DECLARE_ALIGNED_16(DCTELEM, block[64]);
DCTELEM *src_block = ctx->blocks[i];
int overflow, nbits, diff, last_index;
int n = dnxhd_switch_matrix(ctx, i);
- memcpy(block, src_block, sizeof(block));
- last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
+ memcpy(block, src_block, 64*sizeof(*block));
+ last_index = ctx->m.dct_quantize(&ctx->m, block, i, qscale, &overflow);
ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);
diff = block[0] - ctx->m.last_dc[n];
if (diff < 0) nbits = av_log2_16bit(-2*diff);
else nbits = av_log2_16bit( 2*diff);
+
+ assert(nbits < ctx->cid_table->bit_depth + 4);
dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
ctx->m.last_dc[n] = block[0];
ctx->m.last_dc[0] =
ctx->m.last_dc[1] =
- ctx->m.last_dc[2] = 1024;
+ ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
int qscale = ctx->mb_qscale[mb];
for (i = 0; i < 8; i++) {
DCTELEM *block = ctx->blocks[i];
- int last_index, overflow;
- int n = dnxhd_switch_matrix(ctx, i);
- last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
+ int overflow, n = dnxhd_switch_matrix(ctx, i);
+ int last_index = ctx->m.dct_quantize(&ctx->m, block, i,
+ qscale, &overflow);
//START_TIMER;
dnxhd_encode_block(ctx, block, last_index, n);
//STOP_TIMER("encode_block");
for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) {
int thread_size;
ctx->slice_offs[mb_y] = offset;
- ctx->slice_size[mb_y] = 0;
- for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
- unsigned mb = mb_y * ctx->m.mb_width + mb_x;
- ctx->slice_size[mb_y] += ctx->mb_bits[mb];
- }
- ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
- ctx->slice_size[mb_y] >>= 3;
- thread_size = ctx->slice_size[mb_y];
+ ctx->slice_size[mb_y] = 0;
+ for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
+ unsigned mb = mb_y * ctx->m.mb_width + mb_x;
+ ctx->slice_size[mb_y] += ctx->mb_bits[mb];
+ }
+ ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
+ ctx->slice_size[mb_y] >>= 3;
+ thread_size = ctx->slice_size[mb_y];
offset += thread_size;
}
}
DNXHDEncContext *ctx = avctx->priv_data;
int mb_y = jobnr, mb_x;
ctx = ctx->thread[threadnr];
- for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
- unsigned mb = mb_y * ctx->m.mb_width + mb_x;
- uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
- int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
- int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
- ctx->mb_cmp[mb].value = varc;
- ctx->mb_cmp[mb].mb = mb;
+ if (ctx->cid_table->bit_depth == 8) {
+ uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize);
+ for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
+ unsigned mb = mb_y * ctx->m.mb_width + mb_x;
+ int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
+ int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)sum*sum)>>8)+128)>>8;
+ ctx->mb_cmp[mb].value = varc;
+ ctx->mb_cmp[mb].mb = mb;
+ }
+ } else { // 10-bit
+ int const linesize = ctx->m.linesize >> 1;
+ for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
+ uint16_t *pix = (uint16_t*)ctx->thread[0]->src[0] + ((mb_y << 4) * linesize) + (mb_x << 4);
+ unsigned mb = mb_y * ctx->m.mb_width + mb_x;
+ int sum = 0;
+ int sqsum = 0;
+ int mean, sqmean;
+ int i, j;
+ // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8.
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) {
+ // Turn 16-bit pixels into 10-bit ones.
+ int const sample = (unsigned)pix[j] >> 6;
+ sum += sample;
+ sqsum += sample * sample;
+ // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX
+ }
+ pix += linesize;
+ }
+ mean = sum >> 8; // 16*16 == 2^8
+ sqmean = sqsum >> 8;
+ ctx->mb_cmp[mb].value = sqmean - mean * mean;
+ ctx->mb_cmp[mb].mb = mb;
+ }
}
return 0;
}
int qscale = 1;
int mb = y*ctx->m.mb_width+x;
for (q = 1; q < avctx->qmax; q++) {
- unsigned score = ctx->mb_rc[q][mb].bits*lambda+(ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
+ unsigned score = ctx->mb_rc[q][mb].bits*lambda+
+ ((unsigned)ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
if (score < min) {
min = score;
qscale = q;
if (bits > ctx->frame_bits)
break;
}
- //dprintf(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n",
+ //av_dlog(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n",
// lambda, last_higher, last_lower, bits, ctx->frame_bits);
if (end) {
if (bits > ctx->frame_bits)
lambda = (lambda+last_higher)>>1;
else
lambda -= down_step;
- down_step *= 5; // XXX tune ?
+ down_step = FFMIN((int64_t)down_step*5, INT_MAX);
up_step = 1<<LAMBDA_FRAC_BITS;
lambda = FFMAX(1, lambda);
if (lambda == last_lower)
last_higher = FFMAX(lambda, last_higher);
if (last_lower != INT_MAX)
lambda = (lambda+last_lower)>>1;
+ else if ((int64_t)lambda + up_step > INT_MAX)
+ return -1;
else
lambda += up_step;
- up_step *= 5;
+ up_step = FFMIN((int64_t)up_step*5, INT_MAX);
down_step = 1<<LAMBDA_FRAC_BITS;
}
}
- //dprintf(ctx->m.avctx, "out lambda %d\n", lambda);
+ //av_dlog(ctx->m.avctx, "out lambda %d\n", lambda);
ctx->lambda = lambda;
return 0;
}
if (bits > ctx->frame_bits)
break;
}
- //dprintf(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
+ //av_dlog(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
// ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower);
if (bits < ctx->frame_bits) {
if (qscale == 1)
return -1;
}
}
- //dprintf(ctx->m.avctx, "out qscale %d\n", qscale);
+ //av_dlog(ctx->m.avctx, "out qscale %d\n", qscale);
ctx->qscale = qscale;
return 0;
}
ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
}
-static int dnxhd_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
+static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
+ const AVFrame *frame, int *got_packet)
{
DNXHDEncContext *ctx = avctx->priv_data;
int first_field = 1;
int offset, i, ret;
+ uint8_t *buf;
- if (buf_size < ctx->cid_table->frame_size) {
+ if ((ret = ff_alloc_packet(pkt, ctx->cid_table->frame_size)) < 0) {
av_log(avctx, AV_LOG_ERROR, "output buffer is too small to compress picture\n");
- return -1;
+ return ret;
}
+ buf = pkt->data;
- dnxhd_load_picture(ctx, data);
+ dnxhd_load_picture(ctx, frame);
encode_coding_unit:
for (i = 0; i < 3; i++) {
first_field = 0;
ctx->cur_field ^= 1;
buf += ctx->cid_table->coding_unit_size;
- buf_size -= ctx->cid_table->coding_unit_size;
goto encode_coding_unit;
}
ctx->frame.quality = ctx->qscale*FF_QP2LAMBDA;
- return ctx->cid_table->frame_size;
+ pkt->flags |= AV_PKT_FLAG_KEY;
+ *got_packet = 1;
+ return 0;
}
static int dnxhd_encode_end(AVCodecContext *avctx)
return 0;
}
-AVCodec dnxhd_encoder = {
- "dnxhd",
- CODEC_TYPE_VIDEO,
- CODEC_ID_DNXHD,
- sizeof(DNXHDEncContext),
- dnxhd_encode_init,
- dnxhd_encode_picture,
- dnxhd_encode_end,
- .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE},
- .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
+AVCodec ff_dnxhd_encoder = {
+ .name = "dnxhd",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .id = CODEC_ID_DNXHD,
+ .priv_data_size = sizeof(DNXHDEncContext),
+ .init = dnxhd_encode_init,
+ .encode2 = dnxhd_encode_picture,
+ .close = dnxhd_encode_end,
+ .capabilities = CODEC_CAP_SLICE_THREADS,
+ .pix_fmts = (const enum PixelFormat[]){ PIX_FMT_YUV422P,
+ PIX_FMT_YUV422P10,
+ PIX_FMT_NONE },
+ .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
+ .priv_class = &class,
};