2 * Simple free lossless/lossy audio codec
3 * Copyright (c) 2004 Alex Beregszaszi
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 * Simple free lossless/lossy audio codec
25 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
26 * Written and designed by Alex Beregszaszi
29 * - CABAC put/get_symbol
30 * - independent quantizer for channels
31 * - >2 channels support
32 * - more decorrelation types
33 * - more tap_quant tests
34 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
37 #define MAX_CHANNELS 2
43 typedef struct SonicContext {
44 int lossless, decorrelation;
46 int num_taps, downsampling;
49 int channels, samplerate, block_align, frame_size;
53 int *coded_samples[MAX_CHANNELS];
63 int *predictor_state[MAX_CHANNELS];
66 #define LATTICE_SHIFT 10
67 #define SAMPLE_SHIFT 4
68 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
69 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
71 #define BASE_QUANT 0.6
72 #define RATE_VARIATION 3.0
74 static inline int divide(int a, int b)
77 return -( (-a + b/2)/b );
82 static inline int shift(int a,int b)
84 return (a+(1<<(b-1))) >> b;
87 static inline int shift_down(int a,int b)
89 return (a>>b)+((a<0)?1:0);
93 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
97 for (i = 0; i < entries; i++)
98 set_se_golomb(pb, buf[i]);
103 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
107 for (i = 0; i < entries; i++)
108 buf[i] = get_se_golomb(gb);
115 #define ADAPT_LEVEL 8
117 static int bits_to_store(uint64_t x)
129 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
136 bits = bits_to_store(max);
138 for (i = 0; i < bits-1; i++)
139 put_bits(pb, 1, value & (1 << i));
141 if ( (value | (1 << (bits-1))) <= max)
142 put_bits(pb, 1, value & (1 << (bits-1)));
145 static unsigned int read_uint_max(GetBitContext *gb, int max)
147 int i, bits, value = 0;
152 bits = bits_to_store(max);
154 for (i = 0; i < bits-1; i++)
158 if ( (value | (1<<(bits-1))) <= max)
160 value += 1 << (bits-1);
165 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
167 int i, j, x = 0, low_bits = 0, max = 0;
168 int step = 256, pos = 0, dominant = 0, any = 0;
171 copy = av_mallocz(4* entries);
179 for (i = 0; i < entries; i++)
180 energy += abs(buf[i]);
182 low_bits = bits_to_store(energy / (entries * 2));
186 put_bits(pb, 4, low_bits);
189 for (i = 0; i < entries; i++)
191 put_bits(pb, low_bits, abs(buf[i]));
192 copy[i] = abs(buf[i]) >> low_bits;
197 bits = av_mallocz(4* entries*max);
204 for (i = 0; i <= max; i++)
206 for (j = 0; j < entries; j++)
208 bits[x++] = copy[j] > i;
214 int steplet = step >> 8;
216 if (pos + steplet > x)
219 for (i = 0; i < steplet; i++)
220 if (bits[i+pos] != dominant)
223 put_bits(pb, 1, any);
228 step += step / ADAPT_LEVEL;
234 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
238 write_uint_max(pb, interloper, (step >> 8) - 1);
240 pos += interloper + 1;
241 step -= step / ADAPT_LEVEL;
247 dominant = !dominant;
252 for (i = 0; i < entries; i++)
254 put_bits(pb, 1, buf[i] < 0);
262 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
264 int i, low_bits = 0, x = 0;
265 int n_zeros = 0, step = 256, dominant = 0;
266 int pos = 0, level = 0;
267 int *bits = av_mallocz(4* entries);
274 low_bits = get_bits(gb, 4);
277 for (i = 0; i < entries; i++)
278 buf[i] = get_bits(gb, low_bits);
281 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
283 while (n_zeros < entries)
285 int steplet = step >> 8;
289 for (i = 0; i < steplet; i++)
290 bits[x++] = dominant;
295 step += step / ADAPT_LEVEL;
299 int actual_run = read_uint_max(gb, steplet-1);
301 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
303 for (i = 0; i < actual_run; i++)
304 bits[x++] = dominant;
306 bits[x++] = !dominant;
309 n_zeros += actual_run;
313 step -= step / ADAPT_LEVEL;
319 dominant = !dominant;
323 // reconstruct unsigned values
325 for (i = 0; n_zeros < entries; i++)
332 level += 1 << low_bits;
335 if (buf[pos] >= level)
342 buf[pos] += 1 << low_bits;
351 for (i = 0; i < entries; i++)
352 if (buf[i] && get_bits1(gb))
355 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
361 static void predictor_init_state(int *k, int *state, int order)
365 for (i = order-2; i >= 0; i--)
367 int j, p, x = state[i];
369 for (j = 0, p = i+1; p < order; j++,p++)
371 int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
372 state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
378 static int predictor_calc_error(int *k, int *state, int order, int error)
380 int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
383 int *k_ptr = &(k[order-2]),
384 *state_ptr = &(state[order-2]);
385 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
387 int k_value = *k_ptr, state_value = *state_ptr;
388 x -= shift_down(k_value * state_value, LATTICE_SHIFT);
389 state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
392 for (i = order-2; i >= 0; i--)
394 x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
395 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
399 // don't drift too far, to avoid overflows
400 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
401 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
408 // Heavily modified Levinson-Durbin algorithm which
409 // copes better with quantization, and calculates the
410 // actual whitened result as it goes.
412 static void modified_levinson_durbin(int *window, int window_entries,
413 int *out, int out_entries, int channels, int *tap_quant)
416 int *state = av_mallocz(4* window_entries);
418 memcpy(state, window, 4* window_entries);
420 for (i = 0; i < out_entries; i++)
422 int step = (i+1)*channels, k, j;
423 double xx = 0.0, xy = 0.0;
425 int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
426 j = window_entries - step;
427 for (;j>=0;j--,x_ptr++,state_ptr++)
429 double x_value = *x_ptr, state_value = *state_ptr;
430 xx += state_value*state_value;
431 xy += x_value*state_value;
434 for (j = 0; j <= (window_entries - step); j++);
436 double stepval = window[step+j], stateval = window[j];
437 // xx += (double)window[j]*(double)window[j];
438 // xy += (double)window[step+j]*(double)window[j];
439 xx += stateval*stateval;
440 xy += stepval*stateval;
446 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
448 if (k > (LATTICE_FACTOR/tap_quant[i]))
449 k = LATTICE_FACTOR/tap_quant[i];
450 if (-k > (LATTICE_FACTOR/tap_quant[i]))
451 k = -(LATTICE_FACTOR/tap_quant[i]);
457 x_ptr = &(window[step]);
458 state_ptr = &(state[0]);
459 j = window_entries - step;
460 for (;j>=0;j--,x_ptr++,state_ptr++)
462 int x_value = *x_ptr, state_value = *state_ptr;
463 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
464 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
467 for (j=0; j <= (window_entries - step); j++)
469 int stepval = window[step+j], stateval=state[j];
470 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
471 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
479 static int samplerate_table[] =
480 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
482 #ifdef CONFIG_ENCODERS
484 static inline int code_samplerate(int samplerate)
488 case 44100: return 0;
489 case 22050: return 1;
490 case 11025: return 2;
491 case 96000: return 3;
492 case 48000: return 4;
493 case 32000: return 5;
494 case 24000: return 6;
495 case 16000: return 7;
501 static int sonic_encode_init(AVCodecContext *avctx)
503 SonicContext *s = avctx->priv_data;
507 if (avctx->channels > MAX_CHANNELS)
509 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
510 return -1; /* only stereo or mono for now */
513 if (avctx->channels == 2)
514 s->decorrelation = MID_SIDE;
516 if (avctx->codec->id == CODEC_ID_SONIC_LS)
521 s->quantization = 0.0;
527 s->quantization = 1.0;
531 if ((s->num_taps < 32) || (s->num_taps > 1024) ||
532 ((s->num_taps>>5)<<5 != s->num_taps))
534 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
539 s->tap_quant = av_mallocz(4* s->num_taps);
540 for (i = 0; i < s->num_taps; i++)
541 s->tap_quant[i] = (int)(sqrt(i+1));
543 s->channels = avctx->channels;
544 s->samplerate = avctx->sample_rate;
546 s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
547 s->frame_size = s->channels*s->block_align*s->downsampling;
549 s->tail = av_mallocz(4* s->num_taps*s->channels);
552 s->tail_size = s->num_taps*s->channels;
554 s->predictor_k = av_mallocz(4 * s->num_taps);
558 for (i = 0; i < s->channels; i++)
560 s->coded_samples[i] = av_mallocz(4* s->block_align);
561 if (!s->coded_samples[i])
565 s->int_samples = av_mallocz(4* s->frame_size);
567 s->window_size = ((2*s->tail_size)+s->frame_size);
568 s->window = av_mallocz(4* s->window_size);
572 avctx->extradata = av_mallocz(16);
573 if (!avctx->extradata)
575 init_put_bits(&pb, avctx->extradata, 16*8);
577 put_bits(&pb, 2, version); // version
580 put_bits(&pb, 2, s->channels);
581 put_bits(&pb, 4, code_samplerate(s->samplerate));
583 put_bits(&pb, 1, s->lossless);
585 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
586 put_bits(&pb, 2, s->decorrelation);
587 put_bits(&pb, 2, s->downsampling);
588 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
589 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
592 avctx->extradata_size = put_bits_count(&pb)/8;
594 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
595 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
597 avctx->coded_frame = avcodec_alloc_frame();
598 if (!avctx->coded_frame)
600 avctx->coded_frame->key_frame = 1;
601 avctx->frame_size = s->block_align*s->downsampling;
606 static int sonic_encode_close(AVCodecContext *avctx)
608 SonicContext *s = avctx->priv_data;
611 av_freep(&avctx->coded_frame);
613 for (i = 0; i < s->channels; i++)
614 av_free(s->coded_samples[i]);
616 av_free(s->predictor_k);
618 av_free(s->tap_quant);
620 av_free(s->int_samples);
625 static int sonic_encode_frame(AVCodecContext *avctx,
626 uint8_t *buf, int buf_size, void *data)
628 SonicContext *s = avctx->priv_data;
630 int i, j, ch, quant = 0, x = 0;
631 short *samples = data;
633 init_put_bits(&pb, buf, buf_size*8);
636 for (i = 0; i < s->frame_size; i++)
637 s->int_samples[i] = samples[i];
640 for (i = 0; i < s->frame_size; i++)
641 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
643 switch(s->decorrelation)
646 for (i = 0; i < s->frame_size; i += s->channels)
648 s->int_samples[i] += s->int_samples[i+1];
649 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
653 for (i = 0; i < s->frame_size; i += s->channels)
654 s->int_samples[i+1] -= s->int_samples[i];
657 for (i = 0; i < s->frame_size; i += s->channels)
658 s->int_samples[i] -= s->int_samples[i+1];
662 memset(s->window, 0, 4* s->window_size);
664 for (i = 0; i < s->tail_size; i++)
665 s->window[x++] = s->tail[i];
667 for (i = 0; i < s->frame_size; i++)
668 s->window[x++] = s->int_samples[i];
670 for (i = 0; i < s->tail_size; i++)
673 for (i = 0; i < s->tail_size; i++)
674 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
677 modified_levinson_durbin(s->window, s->window_size,
678 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
679 if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
682 for (ch = 0; ch < s->channels; ch++)
685 for (i = 0; i < s->block_align; i++)
688 for (j = 0; j < s->downsampling; j++, x += s->channels)
690 s->coded_samples[ch][i] = sum;
694 // simple rate control code
697 double energy1 = 0.0, energy2 = 0.0;
698 for (ch = 0; ch < s->channels; ch++)
700 for (i = 0; i < s->block_align; i++)
702 double sample = s->coded_samples[ch][i];
703 energy2 += sample*sample;
704 energy1 += fabs(sample);
708 energy2 = sqrt(energy2/(s->channels*s->block_align));
709 energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
711 // increase bitrate when samples are like a gaussian distribution
712 // reduce bitrate when samples are like a two-tailed exponential distribution
714 if (energy2 > energy1)
715 energy2 += (energy2-energy1)*RATE_VARIATION;
717 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
718 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
725 set_ue_golomb(&pb, quant);
727 quant *= SAMPLE_FACTOR;
730 // write out coded samples
731 for (ch = 0; ch < s->channels; ch++)
734 for (i = 0; i < s->block_align; i++)
735 s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
737 if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
741 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
744 return (put_bits_count(&pb)+7)/8;
746 #endif //CONFIG_ENCODERS
748 static int sonic_decode_init(AVCodecContext *avctx)
750 SonicContext *s = avctx->priv_data;
754 s->channels = avctx->channels;
755 s->samplerate = avctx->sample_rate;
757 if (!avctx->extradata)
759 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
763 init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
765 version = get_bits(&gb, 2);
768 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
774 s->channels = get_bits(&gb, 2);
775 s->samplerate = samplerate_table[get_bits(&gb, 4)];
776 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
777 s->channels, s->samplerate);
780 if (s->channels > MAX_CHANNELS)
782 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
786 s->lossless = get_bits1(&gb);
788 skip_bits(&gb, 3); // XXX FIXME
789 s->decorrelation = get_bits(&gb, 2);
791 s->downsampling = get_bits(&gb, 2);
792 s->num_taps = (get_bits(&gb, 5)+1)<<5;
793 if (get_bits1(&gb)) // XXX FIXME
794 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
796 s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
797 s->frame_size = s->channels*s->block_align*s->downsampling;
798 // avctx->frame_size = s->block_align;
800 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
801 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
804 s->tap_quant = av_mallocz(4* s->num_taps);
805 for (i = 0; i < s->num_taps; i++)
806 s->tap_quant[i] = (int)(sqrt(i+1));
808 s->predictor_k = av_mallocz(4* s->num_taps);
810 for (i = 0; i < s->channels; i++)
812 s->predictor_state[i] = av_mallocz(4* s->num_taps);
813 if (!s->predictor_state[i])
817 for (i = 0; i < s->channels; i++)
819 s->coded_samples[i] = av_mallocz(4* s->block_align);
820 if (!s->coded_samples[i])
823 s->int_samples = av_mallocz(4* s->frame_size);
828 static int sonic_decode_close(AVCodecContext *avctx)
830 SonicContext *s = avctx->priv_data;
833 av_free(s->int_samples);
834 av_free(s->tap_quant);
835 av_free(s->predictor_k);
837 for (i = 0; i < s->channels; i++)
839 av_free(s->predictor_state[i]);
840 av_free(s->coded_samples[i]);
846 static int sonic_decode_frame(AVCodecContext *avctx,
847 int16_t *data, int *data_size,
848 uint8_t *buf, int buf_size)
850 SonicContext *s = avctx->priv_data;
853 short *samples = data;
855 if (buf_size == 0) return 0;
857 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
859 init_get_bits(&gb, buf, buf_size*8);
861 intlist_read(&gb, s->predictor_k, s->num_taps, 0);
864 for (i = 0; i < s->num_taps; i++)
865 s->predictor_k[i] *= s->tap_quant[i];
870 quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
872 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
874 for (ch = 0; ch < s->channels; ch++)
878 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
880 intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
882 for (i = 0; i < s->block_align; i++)
884 for (j = 0; j < s->downsampling - 1; j++)
886 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
890 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
894 for (i = 0; i < s->num_taps; i++)
895 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
898 switch(s->decorrelation)
901 for (i = 0; i < s->frame_size; i += s->channels)
903 s->int_samples[i+1] += shift(s->int_samples[i], 1);
904 s->int_samples[i] -= s->int_samples[i+1];
908 for (i = 0; i < s->frame_size; i += s->channels)
909 s->int_samples[i+1] += s->int_samples[i];
912 for (i = 0; i < s->frame_size; i += s->channels)
913 s->int_samples[i] += s->int_samples[i+1];
918 for (i = 0; i < s->frame_size; i++)
919 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
922 for (i = 0; i < s->frame_size; i++)
924 if (s->int_samples[i] > 32767)
926 else if (s->int_samples[i] < -32768)
929 samples[i] = s->int_samples[i];
934 *data_size = s->frame_size * 2;
936 return (get_bits_count(&gb)+7)/8;
939 #ifdef CONFIG_ENCODERS
940 AVCodec sonic_encoder = {
944 sizeof(SonicContext),
951 AVCodec sonic_ls_encoder = {
955 sizeof(SonicContext),
963 #ifdef CONFIG_DECODERS
964 AVCodec sonic_decoder = {
968 sizeof(SonicContext),