3 * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/opt.h"
29 #include "alac_data.h"
31 #define DEFAULT_FRAME_SIZE 4096
32 #define ALAC_EXTRADATA_SIZE 36
33 #define ALAC_FRAME_HEADER_SIZE 55
34 #define ALAC_FRAME_FOOTER_SIZE 3
36 #define ALAC_ESCAPE_CODE 0x1FF
37 #define ALAC_MAX_LPC_ORDER 30
38 #define DEFAULT_MAX_PRED_ORDER 6
39 #define DEFAULT_MIN_PRED_ORDER 4
40 #define ALAC_MAX_LPC_PRECISION 9
41 #define ALAC_MIN_LPC_SHIFT 0
42 #define ALAC_MAX_LPC_SHIFT 9
44 #define ALAC_CHMODE_LEFT_RIGHT 0
45 #define ALAC_CHMODE_LEFT_SIDE 1
46 #define ALAC_CHMODE_RIGHT_SIDE 2
47 #define ALAC_CHMODE_MID_SIDE 3
49 typedef struct RiceContext {
56 typedef struct AlacLPCContext {
58 int lpc_coeff[ALAC_MAX_LPC_ORDER+1];
62 typedef struct AlacEncodeContext {
64 AVCodecContext *avctx;
65 int frame_size; /**< current frame size */
66 int verbatim; /**< current frame verbatim mode flag */
67 int compression_level;
68 int min_prediction_order;
69 int max_prediction_order;
70 int max_coded_frame_size;
71 int write_sample_size;
73 int32_t sample_buf[2][DEFAULT_FRAME_SIZE];
74 int32_t predictor_buf[2][DEFAULT_FRAME_SIZE];
75 int interlacing_shift;
76 int interlacing_leftweight;
79 AlacLPCContext lpc[2];
84 static void init_sample_buffers(AlacEncodeContext *s, int channels,
85 const uint8_t *samples[2])
88 int shift = av_get_bytes_per_sample(s->avctx->sample_fmt) * 8 -
89 s->avctx->bits_per_raw_sample;
91 #define COPY_SAMPLES(type) do { \
92 for (ch = 0; ch < channels; ch++) { \
93 int32_t *bptr = s->sample_buf[ch]; \
94 const type *sptr = (const type *)samples[ch]; \
95 for (i = 0; i < s->frame_size; i++) \
96 bptr[i] = sptr[i] >> shift; \
100 if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P)
101 COPY_SAMPLES(int32_t);
103 COPY_SAMPLES(int16_t);
106 static void encode_scalar(AlacEncodeContext *s, int x,
107 int k, int write_sample_size)
111 k = FFMIN(k, s->rc.k_modifier);
112 divisor = (1<<k) - 1;
117 // write escape code and sample value directly
118 put_bits(&s->pbctx, 9, ALAC_ESCAPE_CODE);
119 put_bits(&s->pbctx, write_sample_size, x);
122 put_bits(&s->pbctx, q, (1<<q) - 1);
123 put_bits(&s->pbctx, 1, 0);
127 put_bits(&s->pbctx, k, r+1);
129 put_bits(&s->pbctx, k-1, 0);
134 static void write_element_header(AlacEncodeContext *s,
135 enum AlacRawDataBlockType element,
140 if (s->frame_size < DEFAULT_FRAME_SIZE)
143 put_bits(&s->pbctx, 3, element); // element type
144 put_bits(&s->pbctx, 4, instance); // element instance
145 put_bits(&s->pbctx, 12, 0); // unused header bits
146 put_bits(&s->pbctx, 1, encode_fs); // Sample count is in the header
147 put_bits(&s->pbctx, 2, s->extra_bits >> 3); // Extra bytes (for 24-bit)
148 put_bits(&s->pbctx, 1, s->verbatim); // Audio block is verbatim
150 put_bits32(&s->pbctx, s->frame_size); // No. of samples in the frame
153 static void calc_predictor_params(AlacEncodeContext *s, int ch)
155 int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
156 int shift[MAX_LPC_ORDER];
159 if (s->compression_level == 1) {
160 s->lpc[ch].lpc_order = 6;
161 s->lpc[ch].lpc_quant = 6;
162 s->lpc[ch].lpc_coeff[0] = 160;
163 s->lpc[ch].lpc_coeff[1] = -190;
164 s->lpc[ch].lpc_coeff[2] = 170;
165 s->lpc[ch].lpc_coeff[3] = -130;
166 s->lpc[ch].lpc_coeff[4] = 80;
167 s->lpc[ch].lpc_coeff[5] = -25;
169 opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch],
171 s->min_prediction_order,
172 s->max_prediction_order,
173 ALAC_MAX_LPC_PRECISION, coefs, shift,
174 FF_LPC_TYPE_LEVINSON, 0,
175 ORDER_METHOD_EST, ALAC_MIN_LPC_SHIFT,
176 ALAC_MAX_LPC_SHIFT, 1);
178 s->lpc[ch].lpc_order = opt_order;
179 s->lpc[ch].lpc_quant = shift[opt_order-1];
180 memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int));
184 static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
191 /* calculate sum of 2nd order residual for each channel */
192 sum[0] = sum[1] = sum[2] = sum[3] = 0;
193 for (i = 2; i < n; i++) {
194 lt = left_ch[i] - 2 * left_ch[i - 1] + left_ch[i - 2];
195 rt = right_ch[i] - 2 * right_ch[i - 1] + right_ch[i - 2];
196 sum[2] += FFABS((lt + rt) >> 1);
197 sum[3] += FFABS(lt - rt);
202 /* calculate score for each mode */
203 score[0] = sum[0] + sum[1];
204 score[1] = sum[0] + sum[3];
205 score[2] = sum[1] + sum[3];
206 score[3] = sum[2] + sum[3];
208 /* return mode with lowest score */
210 for (i = 1; i < 4; i++) {
211 if (score[i] < score[best])
217 static void alac_stereo_decorrelation(AlacEncodeContext *s)
219 int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
220 int i, mode, n = s->frame_size;
223 mode = estimate_stereo_mode(left, right, n);
226 case ALAC_CHMODE_LEFT_RIGHT:
227 s->interlacing_leftweight = 0;
228 s->interlacing_shift = 0;
230 case ALAC_CHMODE_LEFT_SIDE:
231 for (i = 0; i < n; i++)
232 right[i] = left[i] - right[i];
233 s->interlacing_leftweight = 1;
234 s->interlacing_shift = 0;
236 case ALAC_CHMODE_RIGHT_SIDE:
237 for (i = 0; i < n; i++) {
239 right[i] = left[i] - right[i];
240 left[i] = tmp + (right[i] >> 31);
242 s->interlacing_leftweight = 1;
243 s->interlacing_shift = 31;
246 for (i = 0; i < n; i++) {
248 left[i] = (tmp + right[i]) >> 1;
249 right[i] = tmp - right[i];
251 s->interlacing_leftweight = 1;
252 s->interlacing_shift = 1;
257 static void alac_linear_predictor(AlacEncodeContext *s, int ch)
260 AlacLPCContext lpc = s->lpc[ch];
261 int32_t *residual = s->predictor_buf[ch];
263 if (lpc.lpc_order == 31) {
264 residual[0] = s->sample_buf[ch][0];
266 for (i = 1; i < s->frame_size; i++) {
267 residual[i] = s->sample_buf[ch][i ] -
268 s->sample_buf[ch][i - 1];
274 // generalised linear predictor
276 if (lpc.lpc_order > 0) {
277 int32_t *samples = s->sample_buf[ch];
279 // generate warm-up samples
280 residual[0] = samples[0];
281 for (i = 1; i <= lpc.lpc_order; i++)
282 residual[i] = sign_extend(samples[i] - samples[i-1], s->write_sample_size);
284 // perform lpc on remaining samples
285 for (i = lpc.lpc_order + 1; i < s->frame_size; i++) {
286 int sum = 1 << (lpc.lpc_quant - 1), res_val, j;
288 for (j = 0; j < lpc.lpc_order; j++) {
289 sum += (samples[lpc.lpc_order-j] - samples[0]) *
293 sum >>= lpc.lpc_quant;
295 residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
296 s->write_sample_size);
297 res_val = residual[i];
300 int index = lpc.lpc_order - 1;
301 int neg = (res_val < 0);
303 while (index >= 0 && (neg ? (res_val < 0) : (res_val > 0))) {
304 int val = samples[0] - samples[lpc.lpc_order - index];
305 int sign = (val ? FFSIGN(val) : 0);
310 lpc.lpc_coeff[index] -= sign;
312 res_val -= (val >> lpc.lpc_quant) * (lpc.lpc_order - index);
321 static void alac_entropy_coder(AlacEncodeContext *s, int ch)
323 unsigned int history = s->rc.initial_history;
324 int sign_modifier = 0, i, k;
325 int32_t *samples = s->predictor_buf[ch];
327 for (i = 0; i < s->frame_size;) {
330 k = av_log2((history >> 9) + 3);
332 x = -2 * (*samples) -1;
338 encode_scalar(s, x - sign_modifier, k, s->write_sample_size);
340 history += x * s->rc.history_mult -
341 ((history * s->rc.history_mult) >> 9);
347 if (history < 128 && i < s->frame_size) {
348 unsigned int block_size = 0;
350 k = 7 - av_log2(history) + ((history + 16) >> 6);
352 while (*samples == 0 && i < s->frame_size) {
357 encode_scalar(s, block_size, k, 16);
358 sign_modifier = (block_size <= 0xFFFF);
365 static void write_element(AlacEncodeContext *s,
366 enum AlacRawDataBlockType element, int instance,
367 const uint8_t *samples0, const uint8_t *samples1)
369 const uint8_t *samples[2] = { samples0, samples1 };
371 int prediction_type = 0;
372 PutBitContext *pb = &s->pbctx;
374 channels = element == TYPE_CPE ? 2 : 1;
377 write_element_header(s, element, instance);
378 /* samples are channel-interleaved in verbatim mode */
379 if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
380 int shift = 32 - s->avctx->bits_per_raw_sample;
381 const int32_t *samples_s32[2] = { (const int32_t *)samples0,
382 (const int32_t *)samples1 };
383 for (i = 0; i < s->frame_size; i++)
384 for (j = 0; j < channels; j++)
385 put_sbits(pb, s->avctx->bits_per_raw_sample,
386 samples_s32[j][i] >> shift);
388 const int16_t *samples_s16[2] = { (const int16_t *)samples0,
389 (const int16_t *)samples1 };
390 for (i = 0; i < s->frame_size; i++)
391 for (j = 0; j < channels; j++)
392 put_sbits(pb, s->avctx->bits_per_raw_sample,
396 s->write_sample_size = s->avctx->bits_per_raw_sample - s->extra_bits +
399 init_sample_buffers(s, channels, samples);
400 write_element_header(s, element, instance);
402 // extract extra bits if needed
404 uint32_t mask = (1 << s->extra_bits) - 1;
405 for (j = 0; j < channels; j++) {
406 int32_t *extra = s->predictor_buf[j];
407 int32_t *smp = s->sample_buf[j];
408 for (i = 0; i < s->frame_size; i++) {
409 extra[i] = smp[i] & mask;
410 smp[i] >>= s->extra_bits;
416 alac_stereo_decorrelation(s);
418 s->interlacing_shift = s->interlacing_leftweight = 0;
419 put_bits(pb, 8, s->interlacing_shift);
420 put_bits(pb, 8, s->interlacing_leftweight);
422 for (i = 0; i < channels; i++) {
423 calc_predictor_params(s, i);
425 put_bits(pb, 4, prediction_type);
426 put_bits(pb, 4, s->lpc[i].lpc_quant);
428 put_bits(pb, 3, s->rc.rice_modifier);
429 put_bits(pb, 5, s->lpc[i].lpc_order);
430 // predictor coeff. table
431 for (j = 0; j < s->lpc[i].lpc_order; j++)
432 put_sbits(pb, 16, s->lpc[i].lpc_coeff[j]);
435 // write extra bits if needed
437 for (i = 0; i < s->frame_size; i++) {
438 for (j = 0; j < channels; j++) {
439 put_bits(pb, s->extra_bits, s->predictor_buf[j][i]);
444 // apply lpc and entropy coding to audio samples
445 for (i = 0; i < channels; i++) {
446 alac_linear_predictor(s, i);
448 // TODO: determine when this will actually help. for now it's not used.
449 if (prediction_type == 15) {
450 // 2nd pass 1st order filter
451 int32_t *residual = s->predictor_buf[i];
452 for (j = s->frame_size - 1; j > 0; j--)
453 residual[j] -= residual[j - 1];
455 alac_entropy_coder(s, i);
460 static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
461 uint8_t * const *samples)
463 PutBitContext *pb = &s->pbctx;
464 const enum AlacRawDataBlockType *ch_elements = ff_alac_channel_elements[s->avctx->channels - 1];
465 const uint8_t *ch_map = ff_alac_channel_layout_offsets[s->avctx->channels - 1];
466 int ch, element, sce, cpe;
468 init_put_bits(pb, avpkt->data, avpkt->size);
470 ch = element = sce = cpe = 0;
471 while (ch < s->avctx->channels) {
472 if (ch_elements[element] == TYPE_CPE) {
473 write_element(s, TYPE_CPE, cpe, samples[ch_map[ch]],
474 samples[ch_map[ch + 1]]);
478 write_element(s, TYPE_SCE, sce, samples[ch_map[ch]], NULL);
485 put_bits(pb, 3, TYPE_END);
488 return put_bytes_output(pb);
491 static av_always_inline int get_max_frame_size(int frame_size, int ch, int bps)
493 int header_bits = 23 + 32 * (frame_size < DEFAULT_FRAME_SIZE);
494 return FFALIGN(header_bits + bps * ch * frame_size + 3, 8) / 8;
497 static av_cold int alac_encode_close(AVCodecContext *avctx)
499 AlacEncodeContext *s = avctx->priv_data;
500 ff_lpc_end(&s->lpc_ctx);
504 static av_cold int alac_encode_init(AVCodecContext *avctx)
506 AlacEncodeContext *s = avctx->priv_data;
508 uint8_t *alac_extradata;
510 avctx->frame_size = s->frame_size = DEFAULT_FRAME_SIZE;
512 if (avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
513 if (avctx->bits_per_raw_sample != 24)
514 av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
515 avctx->bits_per_raw_sample = 24;
517 avctx->bits_per_raw_sample = 16;
521 // Set default compression level
522 if (avctx->compression_level == FF_COMPRESSION_DEFAULT)
523 s->compression_level = 2;
525 s->compression_level = av_clip(avctx->compression_level, 0, 2);
527 // Initialize default Rice parameters
528 s->rc.history_mult = 40;
529 s->rc.initial_history = 10;
530 s->rc.k_modifier = 14;
531 s->rc.rice_modifier = 4;
533 s->max_coded_frame_size = get_max_frame_size(avctx->frame_size,
535 avctx->bits_per_raw_sample);
537 avctx->extradata = av_mallocz(ALAC_EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
538 if (!avctx->extradata)
539 return AVERROR(ENOMEM);
540 avctx->extradata_size = ALAC_EXTRADATA_SIZE;
542 alac_extradata = avctx->extradata;
543 AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE);
544 AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c'));
545 AV_WB32(alac_extradata+12, avctx->frame_size);
546 AV_WB8 (alac_extradata+17, avctx->bits_per_raw_sample);
547 AV_WB8 (alac_extradata+21, avctx->channels);
548 AV_WB32(alac_extradata+24, s->max_coded_frame_size);
549 AV_WB32(alac_extradata+28,
550 avctx->sample_rate * avctx->channels * avctx->bits_per_raw_sample); // average bitrate
551 AV_WB32(alac_extradata+32, avctx->sample_rate);
553 // Set relevant extradata fields
554 if (s->compression_level > 0) {
555 AV_WB8(alac_extradata+18, s->rc.history_mult);
556 AV_WB8(alac_extradata+19, s->rc.initial_history);
557 AV_WB8(alac_extradata+20, s->rc.k_modifier);
560 if (s->max_prediction_order < s->min_prediction_order) {
561 av_log(avctx, AV_LOG_ERROR,
562 "invalid prediction orders: min=%d max=%d\n",
563 s->min_prediction_order, s->max_prediction_order);
564 return AVERROR(EINVAL);
569 if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
570 s->max_prediction_order,
571 FF_LPC_TYPE_LEVINSON)) < 0) {
578 static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
579 const AVFrame *frame, int *got_packet_ptr)
581 AlacEncodeContext *s = avctx->priv_data;
582 int out_bytes, max_frame_size, ret;
584 s->frame_size = frame->nb_samples;
586 if (frame->nb_samples < DEFAULT_FRAME_SIZE)
587 max_frame_size = get_max_frame_size(s->frame_size, avctx->channels,
588 avctx->bits_per_raw_sample);
590 max_frame_size = s->max_coded_frame_size;
592 if ((ret = ff_alloc_packet2(avctx, avpkt, 4 * max_frame_size, 0)) < 0)
595 /* use verbatim mode for compression_level 0 */
596 if (s->compression_level) {
598 s->extra_bits = avctx->bits_per_raw_sample - 16;
604 out_bytes = write_frame(s, avpkt, frame->extended_data);
606 if (out_bytes > max_frame_size) {
607 /* frame too large. use verbatim mode */
610 out_bytes = write_frame(s, avpkt, frame->extended_data);
613 avpkt->size = out_bytes;
618 #define OFFSET(x) offsetof(AlacEncodeContext, x)
619 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
620 static const AVOption options[] = {
621 { "min_prediction_order", NULL, OFFSET(min_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MIN_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE },
622 { "max_prediction_order", NULL, OFFSET(max_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MAX_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE },
627 static const AVClass alacenc_class = {
628 .class_name = "alacenc",
629 .item_name = av_default_item_name,
631 .version = LIBAVUTIL_VERSION_INT,
634 const AVCodec ff_alac_encoder = {
636 .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
637 .type = AVMEDIA_TYPE_AUDIO,
638 .id = AV_CODEC_ID_ALAC,
639 .priv_data_size = sizeof(AlacEncodeContext),
640 .priv_class = &alacenc_class,
641 .init = alac_encode_init,
642 .encode2 = alac_encode_frame,
643 .close = alac_encode_close,
644 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
645 .channel_layouts = ff_alac_channel_layouts,
646 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
648 AV_SAMPLE_FMT_NONE },