3 * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/opt.h"
29 #include "alac_data.h"
31 #define DEFAULT_FRAME_SIZE 4096
32 #define ALAC_EXTRADATA_SIZE 36
33 #define ALAC_FRAME_HEADER_SIZE 55
34 #define ALAC_FRAME_FOOTER_SIZE 3
36 #define ALAC_ESCAPE_CODE 0x1FF
37 #define ALAC_MAX_LPC_ORDER 30
38 #define DEFAULT_MAX_PRED_ORDER 6
39 #define DEFAULT_MIN_PRED_ORDER 4
40 #define ALAC_MAX_LPC_PRECISION 9
41 #define ALAC_MAX_LPC_SHIFT 9
43 #define ALAC_CHMODE_LEFT_RIGHT 0
44 #define ALAC_CHMODE_LEFT_SIDE 1
45 #define ALAC_CHMODE_RIGHT_SIDE 2
46 #define ALAC_CHMODE_MID_SIDE 3
48 typedef struct RiceContext {
55 typedef struct AlacLPCContext {
57 int lpc_coeff[ALAC_MAX_LPC_ORDER+1];
61 typedef struct AlacEncodeContext {
63 AVCodecContext *avctx;
64 int frame_size; /**< current frame size */
65 int verbatim; /**< current frame verbatim mode flag */
66 int compression_level;
67 int min_prediction_order;
68 int max_prediction_order;
69 int max_coded_frame_size;
70 int write_sample_size;
72 int32_t sample_buf[2][DEFAULT_FRAME_SIZE];
73 int32_t predictor_buf[2][DEFAULT_FRAME_SIZE];
74 int interlacing_shift;
75 int interlacing_leftweight;
78 AlacLPCContext lpc[2];
83 static void init_sample_buffers(AlacEncodeContext *s, int channels,
84 const uint8_t *samples[2])
87 int shift = av_get_bytes_per_sample(s->avctx->sample_fmt) * 8 -
88 s->avctx->bits_per_raw_sample;
90 #define COPY_SAMPLES(type) do { \
91 for (ch = 0; ch < channels; ch++) { \
92 int32_t *bptr = s->sample_buf[ch]; \
93 const type *sptr = (const type *)samples[ch]; \
94 for (i = 0; i < s->frame_size; i++) \
95 bptr[i] = sptr[i] >> shift; \
99 if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P)
100 COPY_SAMPLES(int32_t);
102 COPY_SAMPLES(int16_t);
105 static void encode_scalar(AlacEncodeContext *s, int x,
106 int k, int write_sample_size)
110 k = FFMIN(k, s->rc.k_modifier);
111 divisor = (1<<k) - 1;
116 // write escape code and sample value directly
117 put_bits(&s->pbctx, 9, ALAC_ESCAPE_CODE);
118 put_bits(&s->pbctx, write_sample_size, x);
121 put_bits(&s->pbctx, q, (1<<q) - 1);
122 put_bits(&s->pbctx, 1, 0);
126 put_bits(&s->pbctx, k, r+1);
128 put_bits(&s->pbctx, k-1, 0);
133 static void write_element_header(AlacEncodeContext *s,
134 enum AlacRawDataBlockType element,
139 if (s->frame_size < DEFAULT_FRAME_SIZE)
142 put_bits(&s->pbctx, 3, element); // element type
143 put_bits(&s->pbctx, 4, instance); // element instance
144 put_bits(&s->pbctx, 12, 0); // unused header bits
145 put_bits(&s->pbctx, 1, encode_fs); // Sample count is in the header
146 put_bits(&s->pbctx, 2, s->extra_bits >> 3); // Extra bytes (for 24-bit)
147 put_bits(&s->pbctx, 1, s->verbatim); // Audio block is verbatim
149 put_bits32(&s->pbctx, s->frame_size); // No. of samples in the frame
152 static void calc_predictor_params(AlacEncodeContext *s, int ch)
154 int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
155 int shift[MAX_LPC_ORDER];
158 if (s->compression_level == 1) {
159 s->lpc[ch].lpc_order = 6;
160 s->lpc[ch].lpc_quant = 6;
161 s->lpc[ch].lpc_coeff[0] = 160;
162 s->lpc[ch].lpc_coeff[1] = -190;
163 s->lpc[ch].lpc_coeff[2] = 170;
164 s->lpc[ch].lpc_coeff[3] = -130;
165 s->lpc[ch].lpc_coeff[4] = 80;
166 s->lpc[ch].lpc_coeff[5] = -25;
168 opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch],
170 s->min_prediction_order,
171 s->max_prediction_order,
172 ALAC_MAX_LPC_PRECISION, coefs, shift,
173 FF_LPC_TYPE_LEVINSON, 0,
174 ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1);
176 s->lpc[ch].lpc_order = opt_order;
177 s->lpc[ch].lpc_quant = shift[opt_order-1];
178 memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int));
182 static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
189 /* calculate sum of 2nd order residual for each channel */
190 sum[0] = sum[1] = sum[2] = sum[3] = 0;
191 for (i = 2; i < n; i++) {
192 lt = left_ch[i] - 2 * left_ch[i - 1] + left_ch[i - 2];
193 rt = right_ch[i] - 2 * right_ch[i - 1] + right_ch[i - 2];
194 sum[2] += FFABS((lt + rt) >> 1);
195 sum[3] += FFABS(lt - rt);
200 /* calculate score for each mode */
201 score[0] = sum[0] + sum[1];
202 score[1] = sum[0] + sum[3];
203 score[2] = sum[1] + sum[3];
204 score[3] = sum[2] + sum[3];
206 /* return mode with lowest score */
208 for (i = 1; i < 4; i++) {
209 if (score[i] < score[best])
215 static void alac_stereo_decorrelation(AlacEncodeContext *s)
217 int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
218 int i, mode, n = s->frame_size;
221 mode = estimate_stereo_mode(left, right, n);
224 case ALAC_CHMODE_LEFT_RIGHT:
225 s->interlacing_leftweight = 0;
226 s->interlacing_shift = 0;
228 case ALAC_CHMODE_LEFT_SIDE:
229 for (i = 0; i < n; i++)
230 right[i] = left[i] - right[i];
231 s->interlacing_leftweight = 1;
232 s->interlacing_shift = 0;
234 case ALAC_CHMODE_RIGHT_SIDE:
235 for (i = 0; i < n; i++) {
237 right[i] = left[i] - right[i];
238 left[i] = tmp + (right[i] >> 31);
240 s->interlacing_leftweight = 1;
241 s->interlacing_shift = 31;
244 for (i = 0; i < n; i++) {
246 left[i] = (tmp + right[i]) >> 1;
247 right[i] = tmp - right[i];
249 s->interlacing_leftweight = 1;
250 s->interlacing_shift = 1;
255 static void alac_linear_predictor(AlacEncodeContext *s, int ch)
258 AlacLPCContext lpc = s->lpc[ch];
259 int32_t *residual = s->predictor_buf[ch];
261 if (lpc.lpc_order == 31) {
262 residual[0] = s->sample_buf[ch][0];
264 for (i = 1; i < s->frame_size; i++) {
265 residual[i] = s->sample_buf[ch][i ] -
266 s->sample_buf[ch][i - 1];
272 // generalised linear predictor
274 if (lpc.lpc_order > 0) {
275 int32_t *samples = s->sample_buf[ch];
277 // generate warm-up samples
278 residual[0] = samples[0];
279 for (i = 1; i <= lpc.lpc_order; i++)
280 residual[i] = sign_extend(samples[i] - samples[i-1], s->write_sample_size);
282 // perform lpc on remaining samples
283 for (i = lpc.lpc_order + 1; i < s->frame_size; i++) {
284 int sum = 1 << (lpc.lpc_quant - 1), res_val, j;
286 for (j = 0; j < lpc.lpc_order; j++) {
287 sum += (samples[lpc.lpc_order-j] - samples[0]) *
291 sum >>= lpc.lpc_quant;
293 residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
294 s->write_sample_size);
295 res_val = residual[i];
298 int index = lpc.lpc_order - 1;
299 int neg = (res_val < 0);
301 while (index >= 0 && (neg ? (res_val < 0) : (res_val > 0))) {
302 int val = samples[0] - samples[lpc.lpc_order - index];
303 int sign = (val ? FFSIGN(val) : 0);
308 lpc.lpc_coeff[index] -= sign;
310 res_val -= (val >> lpc.lpc_quant) * (lpc.lpc_order - index);
319 static void alac_entropy_coder(AlacEncodeContext *s, int ch)
321 unsigned int history = s->rc.initial_history;
322 int sign_modifier = 0, i, k;
323 int32_t *samples = s->predictor_buf[ch];
325 for (i = 0; i < s->frame_size;) {
328 k = av_log2((history >> 9) + 3);
330 x = -2 * (*samples) -1;
336 encode_scalar(s, x - sign_modifier, k, s->write_sample_size);
338 history += x * s->rc.history_mult -
339 ((history * s->rc.history_mult) >> 9);
345 if (history < 128 && i < s->frame_size) {
346 unsigned int block_size = 0;
348 k = 7 - av_log2(history) + ((history + 16) >> 6);
350 while (*samples == 0 && i < s->frame_size) {
355 encode_scalar(s, block_size, k, 16);
356 sign_modifier = (block_size <= 0xFFFF);
363 static void write_element(AlacEncodeContext *s,
364 enum AlacRawDataBlockType element, int instance,
365 const uint8_t *samples0, const uint8_t *samples1)
367 const uint8_t *samples[2] = { samples0, samples1 };
369 int prediction_type = 0;
370 PutBitContext *pb = &s->pbctx;
372 channels = element == TYPE_CPE ? 2 : 1;
375 write_element_header(s, element, instance);
376 /* samples are channel-interleaved in verbatim mode */
377 if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
378 int shift = 32 - s->avctx->bits_per_raw_sample;
379 const int32_t *samples_s32[2] = { (const int32_t *)samples0,
380 (const int32_t *)samples1 };
381 for (i = 0; i < s->frame_size; i++)
382 for (j = 0; j < channels; j++)
383 put_sbits(pb, s->avctx->bits_per_raw_sample,
384 samples_s32[j][i] >> shift);
386 const int16_t *samples_s16[2] = { (const int16_t *)samples0,
387 (const int16_t *)samples1 };
388 for (i = 0; i < s->frame_size; i++)
389 for (j = 0; j < channels; j++)
390 put_sbits(pb, s->avctx->bits_per_raw_sample,
394 s->write_sample_size = s->avctx->bits_per_raw_sample - s->extra_bits +
397 init_sample_buffers(s, channels, samples);
398 write_element_header(s, element, instance);
400 // extract extra bits if needed
402 uint32_t mask = (1 << s->extra_bits) - 1;
403 for (j = 0; j < channels; j++) {
404 int32_t *extra = s->predictor_buf[j];
405 int32_t *smp = s->sample_buf[j];
406 for (i = 0; i < s->frame_size; i++) {
407 extra[i] = smp[i] & mask;
408 smp[i] >>= s->extra_bits;
414 alac_stereo_decorrelation(s);
416 s->interlacing_shift = s->interlacing_leftweight = 0;
417 put_bits(pb, 8, s->interlacing_shift);
418 put_bits(pb, 8, s->interlacing_leftweight);
420 for (i = 0; i < channels; i++) {
421 calc_predictor_params(s, i);
423 put_bits(pb, 4, prediction_type);
424 put_bits(pb, 4, s->lpc[i].lpc_quant);
426 put_bits(pb, 3, s->rc.rice_modifier);
427 put_bits(pb, 5, s->lpc[i].lpc_order);
428 // predictor coeff. table
429 for (j = 0; j < s->lpc[i].lpc_order; j++)
430 put_sbits(pb, 16, s->lpc[i].lpc_coeff[j]);
433 // write extra bits if needed
435 for (i = 0; i < s->frame_size; i++) {
436 for (j = 0; j < channels; j++) {
437 put_bits(pb, s->extra_bits, s->predictor_buf[j][i]);
442 // apply lpc and entropy coding to audio samples
443 for (i = 0; i < channels; i++) {
444 alac_linear_predictor(s, i);
446 // TODO: determine when this will actually help. for now it's not used.
447 if (prediction_type == 15) {
448 // 2nd pass 1st order filter
449 int32_t *residual = s->predictor_buf[i];
450 for (j = s->frame_size - 1; j > 0; j--)
451 residual[j] -= residual[j - 1];
453 alac_entropy_coder(s, i);
458 static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
459 uint8_t * const *samples)
461 PutBitContext *pb = &s->pbctx;
462 const enum AlacRawDataBlockType *ch_elements = ff_alac_channel_elements[s->avctx->channels - 1];
463 const uint8_t *ch_map = ff_alac_channel_layout_offsets[s->avctx->channels - 1];
464 int ch, element, sce, cpe;
466 init_put_bits(pb, avpkt->data, avpkt->size);
468 ch = element = sce = cpe = 0;
469 while (ch < s->avctx->channels) {
470 if (ch_elements[element] == TYPE_CPE) {
471 write_element(s, TYPE_CPE, cpe, samples[ch_map[ch]],
472 samples[ch_map[ch + 1]]);
476 write_element(s, TYPE_SCE, sce, samples[ch_map[ch]], NULL);
483 put_bits(pb, 3, TYPE_END);
486 return put_bits_count(pb) >> 3;
489 static av_always_inline int get_max_frame_size(int frame_size, int ch, int bps)
491 int header_bits = 23 + 32 * (frame_size < DEFAULT_FRAME_SIZE);
492 return FFALIGN(header_bits + bps * ch * frame_size + 3, 8) / 8;
495 static av_cold int alac_encode_close(AVCodecContext *avctx)
497 AlacEncodeContext *s = avctx->priv_data;
498 ff_lpc_end(&s->lpc_ctx);
499 av_freep(&avctx->extradata);
500 avctx->extradata_size = 0;
504 static av_cold int alac_encode_init(AVCodecContext *avctx)
506 AlacEncodeContext *s = avctx->priv_data;
508 uint8_t *alac_extradata;
510 avctx->frame_size = s->frame_size = DEFAULT_FRAME_SIZE;
512 if (avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
513 if (avctx->bits_per_raw_sample != 24)
514 av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
515 avctx->bits_per_raw_sample = 24;
517 avctx->bits_per_raw_sample = 16;
521 // Set default compression level
522 if (avctx->compression_level == FF_COMPRESSION_DEFAULT)
523 s->compression_level = 2;
525 s->compression_level = av_clip(avctx->compression_level, 0, 2);
527 // Initialize default Rice parameters
528 s->rc.history_mult = 40;
529 s->rc.initial_history = 10;
530 s->rc.k_modifier = 14;
531 s->rc.rice_modifier = 4;
533 s->max_coded_frame_size = get_max_frame_size(avctx->frame_size,
535 avctx->bits_per_raw_sample);
537 avctx->extradata = av_mallocz(ALAC_EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
538 if (!avctx->extradata) {
539 ret = AVERROR(ENOMEM);
542 avctx->extradata_size = ALAC_EXTRADATA_SIZE;
544 alac_extradata = avctx->extradata;
545 AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE);
546 AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c'));
547 AV_WB32(alac_extradata+12, avctx->frame_size);
548 AV_WB8 (alac_extradata+17, avctx->bits_per_raw_sample);
549 AV_WB8 (alac_extradata+21, avctx->channels);
550 AV_WB32(alac_extradata+24, s->max_coded_frame_size);
551 AV_WB32(alac_extradata+28,
552 avctx->sample_rate * avctx->channels * avctx->bits_per_raw_sample); // average bitrate
553 AV_WB32(alac_extradata+32, avctx->sample_rate);
555 // Set relevant extradata fields
556 if (s->compression_level > 0) {
557 AV_WB8(alac_extradata+18, s->rc.history_mult);
558 AV_WB8(alac_extradata+19, s->rc.initial_history);
559 AV_WB8(alac_extradata+20, s->rc.k_modifier);
562 #if FF_API_PRIVATE_OPT
563 FF_DISABLE_DEPRECATION_WARNINGS
564 if (avctx->min_prediction_order >= 0) {
565 if (avctx->min_prediction_order < MIN_LPC_ORDER ||
566 avctx->min_prediction_order > ALAC_MAX_LPC_ORDER) {
567 av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
568 avctx->min_prediction_order);
569 ret = AVERROR(EINVAL);
573 s->min_prediction_order = avctx->min_prediction_order;
576 if (avctx->max_prediction_order >= 0) {
577 if (avctx->max_prediction_order < MIN_LPC_ORDER ||
578 avctx->max_prediction_order > ALAC_MAX_LPC_ORDER) {
579 av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
580 avctx->max_prediction_order);
581 ret = AVERROR(EINVAL);
585 s->max_prediction_order = avctx->max_prediction_order;
587 FF_ENABLE_DEPRECATION_WARNINGS
590 if (s->max_prediction_order < s->min_prediction_order) {
591 av_log(avctx, AV_LOG_ERROR,
592 "invalid prediction orders: min=%d max=%d\n",
593 s->min_prediction_order, s->max_prediction_order);
594 ret = AVERROR(EINVAL);
600 if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
601 s->max_prediction_order,
602 FF_LPC_TYPE_LEVINSON)) < 0) {
608 alac_encode_close(avctx);
612 static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
613 const AVFrame *frame, int *got_packet_ptr)
615 AlacEncodeContext *s = avctx->priv_data;
616 int out_bytes, max_frame_size, ret;
618 s->frame_size = frame->nb_samples;
620 if (frame->nb_samples < DEFAULT_FRAME_SIZE)
621 max_frame_size = get_max_frame_size(s->frame_size, avctx->channels,
622 avctx->bits_per_raw_sample);
624 max_frame_size = s->max_coded_frame_size;
626 if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size, 0)) < 0)
629 /* use verbatim mode for compression_level 0 */
630 if (s->compression_level) {
632 s->extra_bits = avctx->bits_per_raw_sample - 16;
638 out_bytes = write_frame(s, avpkt, frame->extended_data);
640 if (out_bytes > max_frame_size) {
641 /* frame too large. use verbatim mode */
644 out_bytes = write_frame(s, avpkt, frame->extended_data);
647 avpkt->size = out_bytes;
652 #define OFFSET(x) offsetof(AlacEncodeContext, x)
653 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
654 static const AVOption options[] = {
655 { "min_prediction_order", NULL, OFFSET(min_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MIN_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE },
656 { "max_prediction_order", NULL, OFFSET(max_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MAX_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE },
661 static const AVClass alacenc_class = {
662 .class_name = "alacenc",
663 .item_name = av_default_item_name,
665 .version = LIBAVUTIL_VERSION_INT,
668 AVCodec ff_alac_encoder = {
670 .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
671 .type = AVMEDIA_TYPE_AUDIO,
672 .id = AV_CODEC_ID_ALAC,
673 .priv_data_size = sizeof(AlacEncodeContext),
674 .priv_class = &alacenc_class,
675 .init = alac_encode_init,
676 .encode2 = alac_encode_frame,
677 .close = alac_encode_close,
678 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
679 .channel_layouts = ff_alac_channel_layouts,
680 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
682 AV_SAMPLE_FMT_NONE },