/**
* FLAC audio encoder
- * Copyright (c) 2006 Justin Ruggles <jruggle@earthlink.net>
+ * Copyright (c) 2006 Justin Ruggles <justin.ruggles@gmail.com>
*
* This file is part of FFmpeg.
*
*/
#include "libavutil/crc.h"
-#include "libavutil/lls.h"
+#include "libavutil/md5.h"
#include "avcodec.h"
-#include "bitstream.h"
+#include "get_bits.h"
#include "dsputil.h"
#include "golomb.h"
-
-#define FLAC_MAX_CH 8
-#define FLAC_MIN_BLOCKSIZE 16
-#define FLAC_MAX_BLOCKSIZE 65535
+#include "lpc.h"
+#include "flac.h"
+#include "flacdata.h"
#define FLAC_SUBFRAME_CONSTANT 0
#define FLAC_SUBFRAME_VERBATIM 1
#define FLAC_SUBFRAME_FIXED 8
#define FLAC_SUBFRAME_LPC 32
-#define FLAC_CHMODE_NOT_STEREO 0
-#define FLAC_CHMODE_LEFT_RIGHT 1
-#define FLAC_CHMODE_LEFT_SIDE 8
-#define FLAC_CHMODE_RIGHT_SIDE 9
-#define FLAC_CHMODE_MID_SIDE 10
-
-#define ORDER_METHOD_EST 0
-#define ORDER_METHOD_2LEVEL 1
-#define ORDER_METHOD_4LEVEL 2
-#define ORDER_METHOD_8LEVEL 3
-#define ORDER_METHOD_SEARCH 4
-#define ORDER_METHOD_LOG 5
-
-#define FLAC_STREAMINFO_SIZE 34
-
-#define MIN_LPC_ORDER 1
-#define MAX_LPC_ORDER 32
#define MAX_FIXED_ORDER 4
#define MAX_PARTITION_ORDER 8
#define MAX_PARTITIONS (1 << MAX_PARTITION_ORDER)
} FlacSubframe;
typedef struct FlacFrame {
- FlacSubframe subframes[FLAC_MAX_CH];
+ FlacSubframe subframes[FLAC_MAX_CHANNELS];
int blocksize;
int bs_code[2];
uint8_t crc8;
typedef struct FlacEncodeContext {
PutBitContext pb;
int channels;
- int ch_code;
int samplerate;
int sr_code[2];
+ int max_blocksize;
+ int min_framesize;
int max_framesize;
+ int max_encoded_framesize;
uint32_t frame_count;
+ uint64_t sample_count;
+ uint8_t md5sum[16];
FlacFrame frame;
CompressionOptions options;
AVCodecContext *avctx;
DSPContext dsp;
+ struct AVMD5 *md5ctx;
} FlacEncodeContext;
-static const int flac_samplerates[16] = {
- 0, 0, 0, 0,
- 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
- 0, 0, 0, 0
-};
-
-static const int flac_blocksizes[16] = {
- 0,
- 192,
- 576, 1152, 2304, 4608,
- 0, 0,
- 256, 512, 1024, 2048, 4096, 8192, 16384, 32768
-};
-
/**
- * Writes streaminfo metadata block to byte array
+ * Write streaminfo metadata block to byte array
*/
static void write_streaminfo(FlacEncodeContext *s, uint8_t *header)
{
init_put_bits(&pb, header, FLAC_STREAMINFO_SIZE);
/* streaminfo metadata block */
- put_bits(&pb, 16, s->avctx->frame_size);
- put_bits(&pb, 16, s->avctx->frame_size);
- put_bits(&pb, 24, 0);
+ put_bits(&pb, 16, s->max_blocksize);
+ put_bits(&pb, 16, s->max_blocksize);
+ put_bits(&pb, 24, s->min_framesize);
put_bits(&pb, 24, s->max_framesize);
put_bits(&pb, 20, s->samplerate);
put_bits(&pb, 3, s->channels-1);
put_bits(&pb, 5, 15); /* bits per sample - 1 */
+ /* write 36-bit sample count in 2 put_bits() calls */
+ put_bits(&pb, 24, (s->sample_count & 0xFFFFFF000LL) >> 12);
+ put_bits(&pb, 12, s->sample_count & 0x000000FFFLL);
flush_put_bits(&pb);
- /* total samples = 0 */
- /* MD5 signature = 0 */
+ memcpy(&header[18], s->md5sum, 16);
}
/**
- * Sets blocksize based on samplerate
- * Chooses the closest predefined blocksize >= BLOCK_TIME_MS milliseconds
+ * Set blocksize based on samplerate
+ * Choose the closest predefined blocksize >= BLOCK_TIME_MS milliseconds
*/
static int select_blocksize(int samplerate, int block_time_ms)
{
int blocksize;
assert(samplerate > 0);
- blocksize = flac_blocksizes[1];
+ blocksize = ff_flac_blocksize_table[1];
target = (samplerate * block_time_ms) / 1000;
for(i=0; i<16; i++) {
- if(target >= flac_blocksizes[i] && flac_blocksizes[i] > blocksize) {
- blocksize = flac_blocksizes[i];
+ if(target >= ff_flac_blocksize_table[i] && ff_flac_blocksize_table[i] > blocksize) {
+ blocksize = ff_flac_blocksize_table[i];
}
}
return blocksize;
return -1;
}
- if(channels < 1 || channels > FLAC_MAX_CH) {
+ if(channels < 1 || channels > FLAC_MAX_CHANNELS) {
return -1;
}
s->channels = channels;
- s->ch_code = s->channels-1;
/* find samplerate in table */
if(freq < 1)
return -1;
for(i=4; i<12; i++) {
- if(freq == flac_samplerates[i]) {
- s->samplerate = flac_samplerates[i];
+ if(freq == ff_flac_sample_rate_table[i]) {
+ s->samplerate = ff_flac_sample_rate_table[i];
s->sr_code[0] = i;
s->sr_code[1] = 0;
break;
} else {
s->avctx->frame_size = select_blocksize(s->samplerate, s->options.block_time_ms);
}
+ s->max_blocksize = s->avctx->frame_size;
av_log(avctx, AV_LOG_DEBUG, " block size: %d\n", s->avctx->frame_size);
/* set LPC precision */
s->options.lpc_coeff_precision);
/* set maximum encoded frame size in verbatim mode */
- if(s->channels == 2) {
- s->max_framesize = 14 + ((s->avctx->frame_size * 33 + 7) >> 3);
- } else {
- s->max_framesize = 14 + (s->avctx->frame_size * s->channels * 2);
- }
+ s->max_framesize = ff_flac_get_max_frame_size(s->avctx->frame_size,
+ s->channels, 16);
+
+ /* initialize MD5 context */
+ s->md5ctx = av_malloc(av_md5_size);
+ if(!s->md5ctx)
+ return AVERROR(ENOMEM);
+ av_md5_init(s->md5ctx);
streaminfo = av_malloc(FLAC_STREAMINFO_SIZE);
write_streaminfo(s, streaminfo);
avctx->extradata_size = FLAC_STREAMINFO_SIZE;
s->frame_count = 0;
+ s->min_framesize = s->max_framesize;
avctx->coded_frame = avcodec_alloc_frame();
avctx->coded_frame->key_frame = 1;
frame = &s->frame;
for(i=0; i<16; i++) {
- if(s->avctx->frame_size == flac_blocksizes[i]) {
- frame->blocksize = flac_blocksizes[i];
+ if(s->avctx->frame_size == ff_flac_blocksize_table[i]) {
+ frame->blocksize = ff_flac_blocksize_table[i];
frame->bs_code[0] = i;
frame->bs_code[1] = 0;
break;
return bits;
}
-/**
- * Apply Welch window function to audio block
- */
-static void apply_welch_window(const int32_t *data, int len, double *w_data)
-{
- int i, n2;
- double w;
- double c;
-
- assert(!(len&1)); //the optimization in r11881 does not support odd len
- //if someone wants odd len extend the change in r11881
-
- n2 = (len >> 1);
- c = 2.0 / (len - 1.0);
-
- w_data+=n2;
- data+=n2;
- for(i=0; i<n2; i++) {
- w = c - n2 + i;
- w = 1.0 - (w * w);
- w_data[-i-1] = data[-i-1] * w;
- w_data[+i ] = data[+i ] * w;
- }
-}
-
-/**
- * Calculates autocorrelation data from audio samples
- * A Welch window function is applied before calculation.
- */
-void ff_flac_compute_autocorr(const int32_t *data, int len, int lag,
- double *autoc)
-{
- int i, j;
- double tmp[len + lag + 1];
- double *data1= tmp + lag;
-
- apply_welch_window(data, len, data1);
-
- for(j=0; j<lag; j++)
- data1[j-lag]= 0.0;
- data1[len] = 0.0;
-
- for(j=0; j<lag; j+=2){
- double sum0 = 1.0, sum1 = 1.0;
- for(i=0; i<len; i++){
- sum0 += data1[i] * data1[i-j];
- sum1 += data1[i] * data1[i-j-1];
- }
- autoc[j ] = sum0;
- autoc[j+1] = sum1;
- }
-
- if(j==lag){
- double sum = 1.0;
- for(i=0; i<len; i+=2){
- sum += data1[i ] * data1[i-j ]
- + data1[i+1] * data1[i-j+1];
- }
- autoc[j] = sum;
- }
-}
-
-/**
- * Levinson-Durbin recursion.
- * Produces LPC coefficients from autocorrelation data.
- */
-static void compute_lpc_coefs(const double *autoc, int max_order,
- double lpc[][MAX_LPC_ORDER], double *ref)
-{
- int i, j, i2;
- double r, err, tmp;
- double lpc_tmp[MAX_LPC_ORDER];
-
- for(i=0; i<max_order; i++) lpc_tmp[i] = 0;
- err = autoc[0];
-
- for(i=0; i<max_order; i++) {
- r = -autoc[i+1];
- for(j=0; j<i; j++) {
- r -= lpc_tmp[j] * autoc[i-j];
- }
- r /= err;
- ref[i] = fabs(r);
-
- err *= 1.0 - (r * r);
-
- i2 = (i >> 1);
- lpc_tmp[i] = r;
- for(j=0; j<i2; j++) {
- tmp = lpc_tmp[j];
- lpc_tmp[j] += r * lpc_tmp[i-1-j];
- lpc_tmp[i-1-j] += r * tmp;
- }
- if(i & 1) {
- lpc_tmp[j] += lpc_tmp[j] * r;
- }
-
- for(j=0; j<=i; j++) {
- lpc[i][j] = -lpc_tmp[j];
- }
- }
-}
-
-/**
- * Quantize LPC coefficients
- */
-static void quantize_lpc_coefs(double *lpc_in, int order, int precision,
- int32_t *lpc_out, int *shift, int max_shift, int zero_shift)
-{
- int i;
- double cmax, error;
- int32_t qmax;
- int sh;
-
- /* define maximum levels */
- qmax = (1 << (precision - 1)) - 1;
-
- /* find maximum coefficient value */
- cmax = 0.0;
- for(i=0; i<order; i++) {
- cmax= FFMAX(cmax, fabs(lpc_in[i]));
- }
-
- /* if maximum value quantizes to zero, return all zeros */
- if(cmax * (1 << max_shift) < 1.0) {
- *shift = zero_shift;
- memset(lpc_out, 0, sizeof(int32_t) * order);
- return;
- }
-
- /* calculate level shift which scales max coeff to available bits */
- sh = max_shift;
- while((cmax * (1 << sh) > qmax) && (sh > 0)) {
- sh--;
- }
-
- /* since negative shift values are unsupported in decoder, scale down
- coefficients instead */
- if(sh == 0 && cmax > qmax) {
- double scale = ((double)qmax) / cmax;
- for(i=0; i<order; i++) {
- lpc_in[i] *= scale;
- }
- }
-
- /* output quantized coefficients and level shift */
- error=0;
- for(i=0; i<order; i++) {
- error += lpc_in[i] * (1 << sh);
- lpc_out[i] = av_clip(lrintf(error), -qmax, qmax);
- error -= lpc_out[i];
- }
- *shift = sh;
-}
-
-static int estimate_best_order(double *ref, int max_order)
-{
- int i, est;
-
- est = 1;
- for(i=max_order-1; i>=0; i--) {
- if(ref[i] > 0.10) {
- est = i+1;
- break;
- }
- }
- return est;
-}
-
-/**
- * Calculate LPC coefficients for multiple orders
- */
-static int lpc_calc_coefs(DSPContext *s,
- const int32_t *samples, int blocksize, int max_order,
- int precision, int32_t coefs[][MAX_LPC_ORDER],
- int *shift, int use_lpc, int omethod, int max_shift, int zero_shift)
-{
- double autoc[MAX_LPC_ORDER+1];
- double ref[MAX_LPC_ORDER];
- double lpc[MAX_LPC_ORDER][MAX_LPC_ORDER];
- int i, j, pass;
- int opt_order;
-
- assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER);
-
- if(use_lpc == 1){
- s->flac_compute_autocorr(samples, blocksize, max_order, autoc);
-
- compute_lpc_coefs(autoc, max_order, lpc, ref);
- }else{
- LLSModel m[2];
- double var[MAX_LPC_ORDER+1], weight;
-
- for(pass=0; pass<use_lpc-1; pass++){
- av_init_lls(&m[pass&1], max_order);
-
- weight=0;
- for(i=max_order; i<blocksize; i++){
- for(j=0; j<=max_order; j++)
- var[j]= samples[i-j];
-
- if(pass){
- double eval, inv, rinv;
- eval= av_evaluate_lls(&m[(pass-1)&1], var+1, max_order-1);
- eval= (512>>pass) + fabs(eval - var[0]);
- inv = 1/eval;
- rinv = sqrt(inv);
- for(j=0; j<=max_order; j++)
- var[j] *= rinv;
- weight += inv;
- }else
- weight++;
-
- av_update_lls(&m[pass&1], var, 1.0);
- }
- av_solve_lls(&m[pass&1], 0.001, 0);
- }
-
- for(i=0; i<max_order; i++){
- for(j=0; j<max_order; j++)
- lpc[i][j]= m[(pass-1)&1].coeff[i][j];
- ref[i]= sqrt(m[(pass-1)&1].variance[i] / weight) * (blocksize - max_order) / 4000;
- }
- for(i=max_order-1; i>0; i--)
- ref[i] = ref[i-1] - ref[i];
- }
- opt_order = max_order;
-
- if(omethod == ORDER_METHOD_EST) {
- opt_order = estimate_best_order(ref, max_order);
- i = opt_order-1;
- quantize_lpc_coefs(lpc[i], i+1, precision, coefs[i], &shift[i], max_shift, zero_shift);
- } else {
- for(i=0; i<max_order; i++) {
- quantize_lpc_coefs(lpc[i], i+1, precision, coefs[i], &shift[i], max_shift, zero_shift);
- }
- }
-
- return opt_order;
-}
-
-
static void encode_residual_verbatim(int32_t *res, int32_t *smp, int n)
{
assert(n > 0);
for(i=0; i<order; i++) {
res[i] = smp[i];
}
-#ifdef CONFIG_SMALL
+#if CONFIG_SMALL
for(i=order; i<n; i+=2) {
int j;
int s = smp[i];
}
/* LPC */
- opt_order = lpc_calc_coefs(&ctx->dsp, smp, n, max_order, precision, coefs,
- shift, ctx->options.use_lpc, omethod, MAX_LPC_SHIFT, 0);
+ opt_order = ff_lpc_calc_coefs(&ctx->dsp, smp, n, min_order, max_order,
+ precision, coefs, shift, ctx->options.use_lpc,
+ omethod, MAX_LPC_SHIFT, 0);
if(omethod == ORDER_METHOD_2LEVEL ||
omethod == ORDER_METHOD_4LEVEL ||
omethod == ORDER_METHOD_8LEVEL) {
int levels = 1 << omethod;
- uint32_t bits[levels];
+ uint32_t bits[1 << ORDER_METHOD_8LEVEL];
int order;
int opt_index = levels-1;
opt_order = max_order-1;
}
}
if(best == 0) {
- return FLAC_CHMODE_LEFT_RIGHT;
+ return FLAC_CHMODE_INDEPENDENT;
} else if(best == 1) {
return FLAC_CHMODE_LEFT_SIDE;
} else if(best == 2) {
right = frame->subframes[1].samples;
if(ctx->channels != 2) {
- frame->ch_mode = FLAC_CHMODE_NOT_STEREO;
+ frame->ch_mode = FLAC_CHMODE_INDEPENDENT;
return;
}
frame->ch_mode = estimate_stereo_mode(left, right, n);
/* perform decorrelation and adjust bits-per-sample */
- if(frame->ch_mode == FLAC_CHMODE_LEFT_RIGHT) {
+ if(frame->ch_mode == FLAC_CHMODE_INDEPENDENT) {
return;
}
if(frame->ch_mode == FLAC_CHMODE_MID_SIDE) {
put_bits(&s->pb, 16, 0xFFF8);
put_bits(&s->pb, 4, frame->bs_code[0]);
put_bits(&s->pb, 4, s->sr_code[0]);
- if(frame->ch_mode == FLAC_CHMODE_NOT_STEREO) {
- put_bits(&s->pb, 4, s->ch_code);
+ if(frame->ch_mode == FLAC_CHMODE_INDEPENDENT) {
+ put_bits(&s->pb, 4, s->channels-1);
} else {
put_bits(&s->pb, 4, frame->ch_mode);
}
flush_put_bits(&s->pb);
}
+static void update_md5_sum(FlacEncodeContext *s, int16_t *samples)
+{
+#if HAVE_BIGENDIAN
+ int i;
+ for(i = 0; i < s->frame.blocksize*s->channels; i++) {
+ int16_t smp = le2me_16(samples[i]);
+ av_md5_update(s->md5ctx, (uint8_t *)&smp, 2);
+ }
+#else
+ av_md5_update(s->md5ctx, (uint8_t *)samples, s->frame.blocksize*s->channels*2);
+#endif
+}
+
static int flac_encode_frame(AVCodecContext *avctx, uint8_t *frame,
int buf_size, void *data)
{
FlacEncodeContext *s;
int16_t *samples = data;
int out_bytes;
+ int reencoded=0;
s = avctx->priv_data;
+ if(buf_size < s->max_framesize*2) {
+ av_log(avctx, AV_LOG_ERROR, "output buffer too small\n");
+ return 0;
+ }
+
+ /* when the last block is reached, update the header in extradata */
+ if (!data) {
+ s->max_framesize = s->max_encoded_framesize;
+ av_md5_final(s->md5ctx, s->md5sum);
+ write_streaminfo(s, avctx->extradata);
+ return 0;
+ }
+
init_frame(s);
copy_samples(s, samples);
for(ch=0; ch<s->channels; ch++) {
encode_residual(s, ch);
}
+
+write_frame:
init_put_bits(&s->pb, frame, buf_size);
output_frame_header(s);
output_subframes(s);
output_frame_footer(s);
out_bytes = put_bits_count(&s->pb) >> 3;
- if(out_bytes > s->max_framesize || out_bytes >= buf_size) {
- /* frame too large. use verbatim mode */
- for(ch=0; ch<s->channels; ch++) {
- encode_residual_v(s, ch);
- }
- init_put_bits(&s->pb, frame, buf_size);
- output_frame_header(s);
- output_subframes(s);
- output_frame_footer(s);
- out_bytes = put_bits_count(&s->pb) >> 3;
-
- if(out_bytes > s->max_framesize || out_bytes >= buf_size) {
+ if(out_bytes > s->max_framesize) {
+ if(reencoded) {
/* still too large. must be an error. */
av_log(avctx, AV_LOG_ERROR, "error encoding frame\n");
return -1;
}
+
+ /* frame too large. use verbatim mode */
+ for(ch=0; ch<s->channels; ch++) {
+ encode_residual_v(s, ch);
+ }
+ reencoded = 1;
+ goto write_frame;
}
s->frame_count++;
+ s->sample_count += avctx->frame_size;
+ update_md5_sum(s, samples);
+ if (out_bytes > s->max_encoded_framesize)
+ s->max_encoded_framesize = out_bytes;
+ if (out_bytes < s->min_framesize)
+ s->min_framesize = out_bytes;
+
return out_bytes;
}
static av_cold int flac_encode_close(AVCodecContext *avctx)
{
+ if (avctx->priv_data) {
+ FlacEncodeContext *s = avctx->priv_data;
+ av_freep(&s->md5ctx);
+ }
av_freep(&avctx->extradata);
avctx->extradata_size = 0;
av_freep(&avctx->coded_frame);
AVCodec flac_encoder = {
"flac",
- CODEC_TYPE_AUDIO,
+ AVMEDIA_TYPE_AUDIO,
CODEC_ID_FLAC,
sizeof(FlacEncodeContext),
flac_encode_init,
flac_encode_frame,
flac_encode_close,
NULL,
- .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
- .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE},
+ .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+ .sample_fmts = (const enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE},
.long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"),
};