X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;ds=sidebyside;f=libavcodec%2Fvc2enc.c;h=7bd2e4c2abac3ec29177497cb5d3db98785b2032;hb=5663301560d77486c7f7c03c1aa5f542fab23c24;hp=0a8c3633ef84204efadd58c10b59ab0f0b651167;hpb=e55781954fdc23de1d17dbdca73e3b7006460c80;p=ffmpeg diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 0a8c3633ef8..7bd2e4c2aba 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -29,10 +29,6 @@ #include "vc2enc_dwt.h" #include "diractab.h" -/* Total range is -COEF_LUT_TAB to +COEFF_LUT_TAB, but total tab size is half - * (COEF_LUT_TAB*DIRAC_MAX_QUANT_INDEX), as the sign is appended during encoding */ -#define COEF_LUT_TAB 2048 - /* The limited size resolution of each slice forces us to do this */ #define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes) @@ -152,9 +148,8 @@ typedef struct VC2EncContext { uint8_t quant[MAX_DWT_LEVELS][4]; int custom_quant_matrix; - /* Coefficient LUT */ - uint32_t *coef_lut_val; - uint8_t *coef_lut_len; + /* Division LUT */ + uint32_t qmagic_lut[116][2]; int num_x; /* #slices horizontally */ int num_y; /* #slices vertically */ @@ -164,6 +159,7 @@ typedef struct VC2EncContext { int chroma_y_shift; /* Rate control stuff */ + int frame_max_bytes; int slice_max_bytes; int slice_min_bytes; int q_ceil; @@ -228,48 +224,17 @@ static av_always_inline int count_vc2_ue_uint(uint32_t val) return ff_log2(topbit)*2 + 1; } -static av_always_inline void get_vc2_ue_uint(int val, uint8_t *nbits, - uint32_t *eval) -{ - int i; - int pbits = 0, bits = 0, topbit = 1, maxval = 1; - - if (!val++) { - *nbits = 1; - *eval = 1; - return; - } - - while (val > maxval) { - topbit <<= 1; - maxval <<= 1; - maxval |= 1; - } - - bits = ff_log2(topbit); - - for (i = 0; i < bits; i++) { - topbit >>= 1; - pbits <<= 2; - if (val & topbit) - pbits |= 0x1; - } - - *nbits = bits*2 + 1; - *eval = (pbits << 1) | 1; -} - /* VC-2 10.4 - parse_info() */ static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode) { uint32_t cur_pos, dist; - avpriv_align_put_bits(&s->pb); + align_put_bits(&s->pb); cur_pos = put_bits_count(&s->pb) >> 3; /* Magic string */ - avpriv_put_string(&s->pb, "BBCD", 0); + ff_put_string(&s->pb, "BBCD", 0); /* Parse code */ put_bits(&s->pb, 8, pcode); @@ -434,7 +399,7 @@ static void encode_source_params(VC2EncContext *s) /* VC-2 11 - sequence_header() */ static void encode_seq_header(VC2EncContext *s) { - avpriv_align_put_bits(&s->pb); + align_put_bits(&s->pb); encode_parse_params(s); put_vc2_ue_uint(&s->pb, s->base_vf); encode_source_params(s); @@ -444,7 +409,7 @@ static void encode_seq_header(VC2EncContext *s) /* VC-2 12.1 - picture_header() */ static void encode_picture_header(VC2EncContext *s) { - avpriv_align_put_bits(&s->pb); + align_put_bits(&s->pb); put_bits32(&s->pb, s->picture_number++); } @@ -458,7 +423,7 @@ static void encode_slice_params(VC2EncContext *s) } /* 1st idx = LL, second - vertical, third - horizontal, fourth - total */ -const uint8_t vc2_qm_col_tab[][4] = { +static const uint8_t vc2_qm_col_tab[][4] = { {20, 9, 15, 4}, { 0, 6, 6, 4}, { 0, 3, 3, 5}, @@ -466,7 +431,7 @@ const uint8_t vc2_qm_col_tab[][4] = { { 0, 11, 10, 11} }; -const uint8_t vc2_qm_flat_tab[][4] = { +static const uint8_t vc2_qm_flat_tab[][4] = { { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, @@ -544,19 +509,19 @@ static void encode_transform_params(VC2EncContext *s) static void encode_wavelet_transform(VC2EncContext *s) { encode_transform_params(s); - avpriv_align_put_bits(&s->pb); + align_put_bits(&s->pb); } /* VC-2 12 - picture_parse() */ static void encode_picture_start(VC2EncContext *s) { - avpriv_align_put_bits(&s->pb); + align_put_bits(&s->pb); encode_picture_header(s); - avpriv_align_put_bits(&s->pb); + align_put_bits(&s->pb); encode_wavelet_transform(s); } -#define QUANT(c, qf) (((c) << 2)/(qf)) +#define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift)) /* VC-2 13.5.5.2 - slice_band() */ static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy, @@ -569,24 +534,17 @@ static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy, const int top = b->height * (sy+0) / s->num_y; const int bottom = b->height * (sy+1) / s->num_y; - const int qfactor = ff_dirac_qscale_tab[quant]; - const uint8_t *len_lut = &s->coef_lut_len[quant*COEF_LUT_TAB]; - const uint32_t *val_lut = &s->coef_lut_val[quant*COEF_LUT_TAB]; - dwtcoef *coeff = b->buf + top * b->stride; + const uint64_t q_m = ((uint64_t)(s->qmagic_lut[quant][0])) << 2; + const uint64_t q_a = s->qmagic_lut[quant][1]; + const int q_s = av_log2(ff_dirac_qscale_tab[quant]) + 32; for (y = top; y < bottom; y++) { for (x = left; x < right; x++) { - const int neg = coeff[x] < 0; - uint32_t c_abs = FFABS(coeff[x]); - if (c_abs < COEF_LUT_TAB) { - put_bits(pb, len_lut[c_abs], val_lut[c_abs] | neg); - } else { - c_abs = QUANT(c_abs, qfactor); - put_vc2_ue_uint(pb, c_abs); - if (c_abs) - put_bits(pb, 1, neg); - } + uint32_t c_abs = QUANT(FFABS(coeff[x]), q_m, q_a, q_s); + put_vc2_ue_uint(pb, c_abs); + if (c_abs) + put_bits(pb, 1, coeff[x] < 0); } coeff += b->stride; } @@ -618,8 +576,9 @@ static int count_hq_slice(SliceArgs *slice, int quant_idx) SubBand *b = &s->plane[p].band[level][orientation]; const int q_idx = quants[level][orientation]; - const uint8_t *len_lut = &s->coef_lut_len[q_idx*COEF_LUT_TAB]; - const int qfactor = ff_dirac_qscale_tab[q_idx]; + const uint64_t q_m = ((uint64_t)s->qmagic_lut[q_idx][0]) << 2; + const uint64_t q_a = s->qmagic_lut[q_idx][1]; + const int q_s = av_log2(ff_dirac_qscale_tab[q_idx]) + 32; const int left = b->width * slice->x / s->num_x; const int right = b->width *(slice->x+1) / s->num_x; @@ -630,14 +589,9 @@ static int count_hq_slice(SliceArgs *slice, int quant_idx) for (y = top; y < bottom; y++) { for (x = left; x < right; x++) { - uint32_t c_abs = FFABS(buf[x]); - if (c_abs < COEF_LUT_TAB) { - bits += len_lut[c_abs]; - } else { - c_abs = QUANT(c_abs, qfactor); - bits += count_vc2_ue_uint(c_abs); - bits += !!c_abs; - } + uint32_t c_abs = QUANT(FFABS(buf[x]), q_m, q_a, q_s); + bits += count_vc2_ue_uint(c_abs); + bits += !!c_abs; } buf += b->stride; } @@ -715,7 +669,7 @@ static int calc_slice_sizes(VC2EncContext *s) for (i = 0; i < s->num_x*s->num_y; i++) { SliceArgs *args = &enc_args[i]; - bytes_left += s->slice_max_bytes - args->bytes; + bytes_left += args->bytes; for (j = 0; j < slice_redist_range; j++) { if (args->bytes > bytes_top[j]) { bytes_top[j] = args->bytes; @@ -725,8 +679,10 @@ static int calc_slice_sizes(VC2EncContext *s) } } + bytes_left = s->frame_max_bytes - bytes_left; + /* Second pass - distribute leftover bytes */ - while (1) { + while (bytes_left > 0) { int distributed = 0; for (i = 0; i < slice_redist_range; i++) { SliceArgs *args; @@ -797,7 +753,7 @@ static int encode_hq_slice(AVCodecContext *avctx, void *arg) quants[level][orientation]); } } - avpriv_align_put_bits(pb); + align_put_bits(pb); bytes_len = (put_bits_count(pb) >> 3) - bytes_start - 1; if (p == 2) { int len_diff = slice_bytes_max - (put_bits_count(pb) >> 3); @@ -824,7 +780,6 @@ static int encode_slices(VC2EncContext *s) int slice_x, slice_y, skip = 0; SliceArgs *enc_args = s->slice_args; - avpriv_align_put_bits(&s->pb); flush_put_bits(&s->pb); buf = put_bits_ptr(&s->pb); @@ -911,6 +866,7 @@ static int dwt_plane(AVCodecContext *avctx, void *arg) for (x = 0; x < p->width; x++) { buf[x] = pix[x] - s->diff_offset; } + memset(&buf[x], 0, (p->coef_stride - p->width)*sizeof(dwtcoef)); buf += p->coef_stride; pix += pix_stride; } @@ -920,6 +876,7 @@ static int dwt_plane(AVCodecContext *avctx, void *arg) for (x = 0; x < p->width; x++) { buf[x] = pix[x] - s->diff_offset; } + memset(&buf[x], 0, (p->coef_stride - p->width)*sizeof(dwtcoef)); buf += p->coef_stride; pix += pix_stride; } @@ -974,7 +931,7 @@ static int encode_frame(VC2EncContext *s, AVPacket *avpkt, const AVFrame *frame, /* Encoder version */ if (aux_data) { encode_parse_info(s, DIRAC_PCODE_AUX); - avpriv_put_string(&s->pb, aux_data, 1); + ff_put_string(&s->pb, aux_data, 1); } /* Picture header */ @@ -994,13 +951,13 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet) { int ret = 0; - int sig_size = 256; + int slice_ceil, sig_size = 256; VC2EncContext *s = avctx->priv_data; const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); const int header_size = 100 + aux_data_size; - int64_t max_frame_bytes, r_bitrate = avctx->bit_rate >> (s->interlaced); + int64_t r_bitrate = avctx->bit_rate >> (s->interlaced); s->avctx = avctx; s->size_scaler = 2; @@ -1009,18 +966,21 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, s->next_parse_offset = 0; /* Rate control */ - max_frame_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, - s->avctx->time_base.den) >> 3) - header_size; - s->slice_max_bytes = av_rescale(max_frame_bytes, 1, s->num_x*s->num_y); + s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, + s->avctx->time_base.den) >> 3) - header_size; + s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x*s->num_y); /* Find an appropriate size scaler */ while (sig_size > 255) { int r_size = SSIZE_ROUND(s->slice_max_bytes); + if (r_size > slice_ceil) { + s->slice_max_bytes -= r_size - slice_ceil; + r_size = SSIZE_ROUND(s->slice_max_bytes); + } sig_size = r_size/s->size_scaler; /* Signalled slize size */ s->size_scaler <<= 1; } - s->slice_max_bytes = SSIZE_ROUND(s->slice_max_bytes); s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced); @@ -1053,8 +1013,6 @@ static av_cold int vc2_encode_end(AVCodecContext *avctx) } av_freep(&s->slice_args); - av_freep(&s->coef_lut_len); - av_freep(&s->coef_lut_val); return 0; } @@ -1063,7 +1021,7 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) { Plane *p; SubBand *b; - int i, j, level, o, shift, ret; + int i, level, o, shift, ret; const AVPixFmtDescriptor *fmt = av_pix_fmt_desc_get(avctx->pix_fmt); const int depth = fmt->comp[0].depth; VC2EncContext *s = avctx->priv_data; @@ -1173,7 +1131,7 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) p->dwt_width = w = FFALIGN(p->width, (1 << s->wavelet_depth)); p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth)); p->coef_stride = FFALIGN(p->dwt_width, 32); - p->coef_buf = av_malloc(p->coef_stride*p->dwt_height*sizeof(dwtcoef)); + p->coef_buf = av_mallocz(p->coef_stride*p->dwt_height*sizeof(dwtcoef)); if (!p->coef_buf) goto alloc_fail; for (level = s->wavelet_depth-1; level >= 0; level--) { @@ -1205,27 +1163,20 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) if (!s->slice_args) goto alloc_fail; - /* Lookup tables */ - s->coef_lut_len = av_malloc(COEF_LUT_TAB*(s->q_ceil+1)*sizeof(*s->coef_lut_len)); - if (!s->coef_lut_len) - goto alloc_fail; - - s->coef_lut_val = av_malloc(COEF_LUT_TAB*(s->q_ceil+1)*sizeof(*s->coef_lut_val)); - if (!s->coef_lut_val) - goto alloc_fail; - - for (i = 0; i < s->q_ceil; i++) { - uint8_t *len_lut = &s->coef_lut_len[i*COEF_LUT_TAB]; - uint32_t *val_lut = &s->coef_lut_val[i*COEF_LUT_TAB]; - for (j = 0; j < COEF_LUT_TAB; j++) { - get_vc2_ue_uint(QUANT(j, ff_dirac_qscale_tab[i]), - &len_lut[j], &val_lut[j]); - if (len_lut[j] != 1) { - len_lut[j] += 1; - val_lut[j] <<= 1; - } else { - val_lut[j] = 1; - } + for (i = 0; i < 116; i++) { + const uint64_t qf = ff_dirac_qscale_tab[i]; + const uint32_t m = av_log2(qf); + const uint32_t t = (1ULL << (m + 32)) / qf; + const uint32_t r = (t*qf + qf) & UINT32_MAX; + if (!(qf & (qf - 1))) { + s->qmagic_lut[i][0] = 0xFFFFFFFF; + s->qmagic_lut[i][1] = 0xFFFFFFFF; + } else if (r <= 1 << m) { + s->qmagic_lut[i][0] = t + 1; + s->qmagic_lut[i][1] = 0; + } else { + s->qmagic_lut[i][0] = t; + s->qmagic_lut[i][1] = t; } }