git.sesse.net Git - ffmpeg/blob - libavcodec/sonic.c

   1 /*
   2  * Simple free lossless/lossy audio codec
   3  * Copyright (c) 2004 Alex Beregszaszi
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21 #include "avcodec.h"
  22 #include "get_bits.h"
  23 #include "golomb.h"
  24 #include "internal.h"
  25 #include "rangecoder.h"
  26
  27
  28 /**
  29  * @file
  30  * Simple free lossless/lossy audio codec
  31  * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  32  * Written and designed by Alex Beregszaszi
  33  *
  34  * TODO:
  35  *  - CABAC put/get_symbol
  36  *  - independent quantizer for channels
  37  *  - >2 channels support
  38  *  - more decorrelation types
  39  *  - more tap_quant tests
  40  *  - selectable intlist writers/readers (bonk-style, golomb, cabac)
  41  */
  42
  43 #define MAX_CHANNELS 2
  44
  45 #define MID_SIDE 0
  46 #define LEFT_SIDE 1
  47 #define RIGHT_SIDE 2
  48
  49 typedef struct SonicContext {
  50     int version;
  51     int minor_version;
  52     int lossless, decorrelation;
  53
  54     int num_taps, downsampling;
  55     double quantization;
  56
  57     int channels, samplerate, block_align, frame_size;
  58
  59     int *tap_quant;
  60     int *int_samples;
  61     int *coded_samples[MAX_CHANNELS];
  62
  63     // for encoding
  64     int *tail;
  65     int tail_size;
  66     int *window;
  67     int window_size;
  68
  69     // for decoding
  70     int *predictor_k;
  71     int *predictor_state[MAX_CHANNELS];
  72 } SonicContext;
  73
  74 #define LATTICE_SHIFT   10
  75 #define SAMPLE_SHIFT    4
  76 #define LATTICE_FACTOR  (1 << LATTICE_SHIFT)
  77 #define SAMPLE_FACTOR   (1 << SAMPLE_SHIFT)
  78
  79 #define BASE_QUANT      0.6
  80 #define RATE_VARIATION  3.0
  81
  82 static inline int shift(int a,int b)
  83 {
  84     return (a+(1<<(b-1))) >> b;
  85 }
  86
  87 static inline int shift_down(int a,int b)
  88 {
  89     return (a>>b)+(a<0);
  90 }
  91
  92 static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
  93     int i;
  94
  95 #define put_rac(C,S,B) \
  96 do{\
  97     if(rc_stat){\
  98         rc_stat[*(S)][B]++;\
  99         rc_stat2[(S)-state][B]++;\
 100     }\
 101     put_rac(C,S,B);\
 102 }while(0)
 103
 104     if(v){
 105         const int a= FFABS(v);
 106         const int e= av_log2(a);
 107         put_rac(c, state+0, 0);
 108         if(e<=9){
 109             for(i=0; i<e; i++){
 110                 put_rac(c, state+1+i, 1);  //1..10
 111             }
 112             put_rac(c, state+1+i, 0);
 113
 114             for(i=e-1; i>=0; i--){
 115                 put_rac(c, state+22+i, (a>>i)&1); //22..31
 116             }
 117
 118             if(is_signed)
 119                 put_rac(c, state+11 + e, v < 0); //11..21
 120         }else{
 121             for(i=0; i<e; i++){
 122                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
 123             }
 124             put_rac(c, state+1+9, 0);
 125
 126             for(i=e-1; i>=0; i--){
 127                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
 128             }
 129
 130             if(is_signed)
 131                 put_rac(c, state+11 + 10, v < 0); //11..21
 132         }
 133     }else{
 134         put_rac(c, state+0, 1);
 135     }
 136 #undef put_rac
 137 }
 138
 139 static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
 140     if(get_rac(c, state+0))
 141         return 0;
 142     else{
 143         int i, e, a;
 144         e= 0;
 145         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
 146             e++;
 147             if (e > 31)
 148                 return AVERROR_INVALIDDATA;
 149         }
 150
 151         a= 1;
 152         for(i=e-1; i>=0; i--){
 153             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
 154         }
 155
 156         e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
 157         return (a^e)-e;
 158     }
 159 }
 160
 161 #if 1
 162 static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
 163 {
 164     int i;
 165
 166     for (i = 0; i < entries; i++)
 167         put_symbol(c, state, buf[i], 1, NULL, NULL);
 168
 169     return 1;
 170 }
 171
 172 static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
 173 {
 174     int i;
 175
 176     for (i = 0; i < entries; i++)
 177         buf[i] = get_symbol(c, state, 1);
 178
 179     return 1;
 180 }
 181 #elif 1
 182 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
 183 {
 184     int i;
 185
 186     for (i = 0; i < entries; i++)
 187         set_se_golomb(pb, buf[i]);
 188
 189     return 1;
 190 }
 191
 192 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
 193 {
 194     int i;
 195
 196     for (i = 0; i < entries; i++)
 197         buf[i] = get_se_golomb(gb);
 198
 199     return 1;
 200 }
 201
 202 #else
 203
 204 #define ADAPT_LEVEL 8
 205
 206 static int bits_to_store(uint64_t x)
 207 {
 208     int res = 0;
 209
 210     while(x)
 211     {
 212         res++;
 213         x >>= 1;
 214     }
 215     return res;
 216 }
 217
 218 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
 219 {
 220     int i, bits;
 221
 222     if (!max)
 223         return;
 224
 225     bits = bits_to_store(max);
 226
 227     for (i = 0; i < bits-1; i++)
 228         put_bits(pb, 1, value & (1 << i));
 229
 230     if ( (value | (1 << (bits-1))) <= max)
 231         put_bits(pb, 1, value & (1 << (bits-1)));
 232 }
 233
 234 static unsigned int read_uint_max(GetBitContext *gb, int max)
 235 {
 236     int i, bits, value = 0;
 237
 238     if (!max)
 239         return 0;
 240
 241     bits = bits_to_store(max);
 242
 243     for (i = 0; i < bits-1; i++)
 244         if (get_bits1(gb))
 245             value += 1 << i;
 246
 247     if ( (value | (1<<(bits-1))) <= max)
 248         if (get_bits1(gb))
 249             value += 1 << (bits-1);
 250
 251     return value;
 252 }
 253
 254 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
 255 {
 256     int i, j, x = 0, low_bits = 0, max = 0;
 257     int step = 256, pos = 0, dominant = 0, any = 0;
 258     int *copy, *bits;
 259
 260     copy = av_calloc(entries, sizeof(*copy));
 261     if (!copy)
 262         return AVERROR(ENOMEM);
 263
 264     if (base_2_part)
 265     {
 266         int energy = 0;
 267
 268         for (i = 0; i < entries; i++)
 269             energy += abs(buf[i]);
 270
 271         low_bits = bits_to_store(energy / (entries * 2));
 272         if (low_bits > 15)
 273             low_bits = 15;
 274
 275         put_bits(pb, 4, low_bits);
 276     }
 277
 278     for (i = 0; i < entries; i++)
 279     {
 280         put_bits(pb, low_bits, abs(buf[i]));
 281         copy[i] = abs(buf[i]) >> low_bits;
 282         if (copy[i] > max)
 283             max = abs(copy[i]);
 284     }
 285
 286     bits = av_calloc(entries*max, sizeof(*bits));
 287     if (!bits)
 288     {
 289         av_free(copy);
 290         return AVERROR(ENOMEM);
 291     }
 292
 293     for (i = 0; i <= max; i++)
 294     {
 295         for (j = 0; j < entries; j++)
 296             if (copy[j] >= i)
 297                 bits[x++] = copy[j] > i;
 298     }
 299
 300     // store bitstream
 301     while (pos < x)
 302     {
 303         int steplet = step >> 8;
 304
 305         if (pos + steplet > x)
 306             steplet = x - pos;
 307
 308         for (i = 0; i < steplet; i++)
 309             if (bits[i+pos] != dominant)
 310                 any = 1;
 311
 312         put_bits(pb, 1, any);
 313
 314         if (!any)
 315         {
 316             pos += steplet;
 317             step += step / ADAPT_LEVEL;
 318         }
 319         else
 320         {
 321             int interloper = 0;
 322
 323             while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
 324                 interloper++;
 325
 326             // note change
 327             write_uint_max(pb, interloper, (step >> 8) - 1);
 328
 329             pos += interloper + 1;
 330             step -= step / ADAPT_LEVEL;
 331         }
 332
 333         if (step < 256)
 334         {
 335             step = 65536 / step;
 336             dominant = !dominant;
 337         }
 338     }
 339
 340     // store signs
 341     for (i = 0; i < entries; i++)
 342         if (buf[i])
 343             put_bits(pb, 1, buf[i] < 0);
 344
 345     av_free(bits);
 346     av_free(copy);
 347
 348     return 0;
 349 }
 350
 351 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
 352 {
 353     int i, low_bits = 0, x = 0;
 354     int n_zeros = 0, step = 256, dominant = 0;
 355     int pos = 0, level = 0;
 356     int *bits = av_calloc(entries, sizeof(*bits));
 357
 358     if (!bits)
 359         return AVERROR(ENOMEM);
 360
 361     if (base_2_part)
 362     {
 363         low_bits = get_bits(gb, 4);
 364
 365         if (low_bits)
 366             for (i = 0; i < entries; i++)
 367                 buf[i] = get_bits(gb, low_bits);
 368     }
 369
 370 //    av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
 371
 372     while (n_zeros < entries)
 373     {
 374         int steplet = step >> 8;
 375
 376         if (!get_bits1(gb))
 377         {
 378             for (i = 0; i < steplet; i++)
 379                 bits[x++] = dominant;
 380
 381             if (!dominant)
 382                 n_zeros += steplet;
 383
 384             step += step / ADAPT_LEVEL;
 385         }
 386         else
 387         {
 388             int actual_run = read_uint_max(gb, steplet-1);
 389
 390 //            av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
 391
 392             for (i = 0; i < actual_run; i++)
 393                 bits[x++] = dominant;
 394
 395             bits[x++] = !dominant;
 396
 397             if (!dominant)
 398                 n_zeros += actual_run;
 399             else
 400                 n_zeros++;
 401
 402             step -= step / ADAPT_LEVEL;
 403         }
 404
 405         if (step < 256)
 406         {
 407             step = 65536 / step;
 408             dominant = !dominant;
 409         }
 410     }
 411
 412     // reconstruct unsigned values
 413     n_zeros = 0;
 414     for (i = 0; n_zeros < entries; i++)
 415     {
 416         while(1)
 417         {
 418             if (pos >= entries)
 419             {
 420                 pos = 0;
 421                 level += 1 << low_bits;
 422             }
 423
 424             if (buf[pos] >= level)
 425                 break;
 426
 427             pos++;
 428         }
 429
 430         if (bits[i])
 431             buf[pos] += 1 << low_bits;
 432         else
 433             n_zeros++;
 434
 435         pos++;
 436     }
 437     av_free(bits);
 438
 439     // read signs
 440     for (i = 0; i < entries; i++)
 441         if (buf[i] && get_bits1(gb))
 442             buf[i] = -buf[i];
 443
 444 //    av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
 445
 446     return 0;
 447 }
 448 #endif
 449
 450 static void predictor_init_state(int *k, int *state, int order)
 451 {
 452     int i;
 453
 454     for (i = order-2; i >= 0; i--)
 455     {
 456         int j, p, x = state[i];
 457
 458         for (j = 0, p = i+1; p < order; j++,p++)
 459             {
 460             int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
 461             state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
 462             x = tmp;
 463         }
 464     }
 465 }
 466
 467 static int predictor_calc_error(int *k, int *state, int order, int error)
 468 {
 469     int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
 470
 471 #if 1
 472     int *k_ptr = &(k[order-2]),
 473         *state_ptr = &(state[order-2]);
 474     for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
 475     {
 476         int k_value = *k_ptr, state_value = *state_ptr;
 477         x -= shift_down(k_value * state_value, LATTICE_SHIFT);
 478         state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
 479     }
 480 #else
 481     for (i = order-2; i >= 0; i--)
 482     {
 483         x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
 484         state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
 485     }
 486 #endif
 487
 488     // don't drift too far, to avoid overflows
 489     if (x >  (SAMPLE_FACTOR<<16)) x =  (SAMPLE_FACTOR<<16);
 490     if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
 491
 492     state[0] = x;
 493
 494     return x;
 495 }
 496
 497 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
 498 // Heavily modified Levinson-Durbin algorithm which
 499 // copes better with quantization, and calculates the
 500 // actual whitened result as it goes.
 501
 502 static int modified_levinson_durbin(int *window, int window_entries,
 503         int *out, int out_entries, int channels, int *tap_quant)
 504 {
 505     int i;
 506     int *state = av_calloc(window_entries, sizeof(*state));
 507
 508     if (!state)
 509         return AVERROR(ENOMEM);
 510
 511     memcpy(state, window, 4* window_entries);
 512
 513     for (i = 0; i < out_entries; i++)
 514     {
 515         int step = (i+1)*channels, k, j;
 516         double xx = 0.0, xy = 0.0;
 517 #if 1
 518         int *x_ptr = &(window[step]);
 519         int *state_ptr = &(state[0]);
 520         j = window_entries - step;
 521         for (;j>0;j--,x_ptr++,state_ptr++)
 522         {
 523             double x_value = *x_ptr;
 524             double state_value = *state_ptr;
 525             xx += state_value*state_value;
 526             xy += x_value*state_value;
 527         }
 528 #else
 529         for (j = 0; j <= (window_entries - step); j++);
 530         {
 531             double stepval = window[step+j];
 532             double stateval = window[j];
 533 //            xx += (double)window[j]*(double)window[j];
 534 //            xy += (double)window[step+j]*(double)window[j];
 535             xx += stateval*stateval;
 536             xy += stepval*stateval;
 537         }
 538 #endif
 539         if (xx == 0.0)
 540             k = 0;
 541         else
 542             k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
 543
 544         if (k > (LATTICE_FACTOR/tap_quant[i]))
 545             k = LATTICE_FACTOR/tap_quant[i];
 546         if (-k > (LATTICE_FACTOR/tap_quant[i]))
 547             k = -(LATTICE_FACTOR/tap_quant[i]);
 548
 549         out[i] = k;
 550         k *= tap_quant[i];
 551
 552 #if 1
 553         x_ptr = &(window[step]);
 554         state_ptr = &(state[0]);
 555         j = window_entries - step;
 556         for (;j>0;j--,x_ptr++,state_ptr++)
 557         {
 558             int x_value = *x_ptr;
 559             int state_value = *state_ptr;
 560             *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
 561             *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
 562         }
 563 #else
 564         for (j=0; j <= (window_entries - step); j++)
 565         {
 566             int stepval = window[step+j];
 567             int stateval=state[j];
 568             window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
 569             state[j] += shift_down(k * stepval, LATTICE_SHIFT);
 570         }
 571 #endif
 572     }
 573
 574     av_free(state);
 575     return 0;
 576 }
 577
 578 static inline int code_samplerate(int samplerate)
 579 {
 580     switch (samplerate)
 581     {
 582         case 44100: return 0;
 583         case 22050: return 1;
 584         case 11025: return 2;
 585         case 96000: return 3;
 586         case 48000: return 4;
 587         case 32000: return 5;
 588         case 24000: return 6;
 589         case 16000: return 7;
 590         case 8000: return 8;
 591     }
 592     return AVERROR(EINVAL);
 593 }
 594
 595 static av_cold int sonic_encode_init(AVCodecContext *avctx)
 596 {
 597     SonicContext *s = avctx->priv_data;
 598     PutBitContext pb;
 599     int i;
 600
 601     s->version = 2;
 602
 603     if (avctx->channels > MAX_CHANNELS)
 604     {
 605         av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
 606         return AVERROR(EINVAL); /* only stereo or mono for now */
 607     }
 608
 609     if (avctx->channels == 2)
 610         s->decorrelation = MID_SIDE;
 611     else
 612         s->decorrelation = 3;
 613
 614     if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
 615     {
 616         s->lossless = 1;
 617         s->num_taps = 32;
 618         s->downsampling = 1;
 619         s->quantization = 0.0;
 620     }
 621     else
 622     {
 623         s->num_taps = 128;
 624         s->downsampling = 2;
 625         s->quantization = 1.0;
 626     }
 627
 628     // max tap 2048
 629     if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
 630         av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
 631         return AVERROR_INVALIDDATA;
 632     }
 633
 634     // generate taps
 635     s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
 636     if (!s->tap_quant)
 637         return AVERROR(ENOMEM);
 638
 639     for (i = 0; i < s->num_taps; i++)
 640         s->tap_quant[i] = ff_sqrt(i+1);
 641
 642     s->channels = avctx->channels;
 643     s->samplerate = avctx->sample_rate;
 644
 645     s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
 646     s->frame_size = s->channels*s->block_align*s->downsampling;
 647
 648     s->tail_size = s->num_taps*s->channels;
 649     s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
 650     if (!s->tail)
 651         return AVERROR(ENOMEM);
 652
 653     s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
 654     if (!s->predictor_k)
 655         return AVERROR(ENOMEM);
 656
 657     for (i = 0; i < s->channels; i++)
 658     {
 659         s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
 660         if (!s->coded_samples[i])
 661             return AVERROR(ENOMEM);
 662     }
 663
 664     s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
 665
 666     s->window_size = ((2*s->tail_size)+s->frame_size);
 667     s->window = av_calloc(s->window_size, sizeof(*s->window));
 668     if (!s->window || !s->int_samples)
 669         return AVERROR(ENOMEM);
 670
 671     avctx->extradata = av_mallocz(16);
 672     if (!avctx->extradata)
 673         return AVERROR(ENOMEM);
 674     init_put_bits(&pb, avctx->extradata, 16*8);
 675
 676     put_bits(&pb, 2, s->version); // version
 677     if (s->version >= 1)
 678     {
 679         if (s->version >= 2) {
 680             put_bits(&pb, 8, s->version);
 681             put_bits(&pb, 8, s->minor_version);
 682         }
 683         put_bits(&pb, 2, s->channels);
 684         put_bits(&pb, 4, code_samplerate(s->samplerate));
 685     }
 686     put_bits(&pb, 1, s->lossless);
 687     if (!s->lossless)
 688         put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
 689     put_bits(&pb, 2, s->decorrelation);
 690     put_bits(&pb, 2, s->downsampling);
 691     put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
 692     put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
 693
 694     flush_put_bits(&pb);
 695     avctx->extradata_size = put_bits_count(&pb)/8;
 696
 697     av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
 698         s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
 699
 700     avctx->frame_size = s->block_align*s->downsampling;
 701
 702     return 0;
 703 }
 704
 705 static av_cold int sonic_encode_close(AVCodecContext *avctx)
 706 {
 707     SonicContext *s = avctx->priv_data;
 708     int i;
 709
 710     for (i = 0; i < s->channels; i++)
 711         av_freep(&s->coded_samples[i]);
 712
 713     av_freep(&s->predictor_k);
 714     av_freep(&s->tail);
 715     av_freep(&s->tap_quant);
 716     av_freep(&s->window);
 717     av_freep(&s->int_samples);
 718
 719     return 0;
 720 }
 721
 722 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 723                               const AVFrame *frame, int *got_packet_ptr)
 724 {
 725     SonicContext *s = avctx->priv_data;
 726     RangeCoder c;
 727     int i, j, ch, quant = 0, x = 0;
 728     int ret;
 729     const short *samples = (const int16_t*)frame->data[0];
 730     uint8_t state[32];
 731
 732     if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0)
 733         return ret;
 734
 735     ff_init_range_encoder(&c, avpkt->data, avpkt->size);
 736     ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
 737     memset(state, 128, sizeof(state));
 738
 739     // short -> internal
 740     for (i = 0; i < s->frame_size; i++)
 741         s->int_samples[i] = samples[i];
 742
 743     if (!s->lossless)
 744         for (i = 0; i < s->frame_size; i++)
 745             s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
 746
 747     switch(s->decorrelation)
 748     {
 749         case MID_SIDE:
 750             for (i = 0; i < s->frame_size; i += s->channels)
 751             {
 752                 s->int_samples[i] += s->int_samples[i+1];
 753                 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
 754             }
 755             break;
 756         case LEFT_SIDE:
 757             for (i = 0; i < s->frame_size; i += s->channels)
 758                 s->int_samples[i+1] -= s->int_samples[i];
 759             break;
 760         case RIGHT_SIDE:
 761             for (i = 0; i < s->frame_size; i += s->channels)
 762                 s->int_samples[i] -= s->int_samples[i+1];
 763             break;
 764     }
 765
 766     memset(s->window, 0, 4* s->window_size);
 767
 768     for (i = 0; i < s->tail_size; i++)
 769         s->window[x++] = s->tail[i];
 770
 771     for (i = 0; i < s->frame_size; i++)
 772         s->window[x++] = s->int_samples[i];
 773
 774     for (i = 0; i < s->tail_size; i++)
 775         s->window[x++] = 0;
 776
 777     for (i = 0; i < s->tail_size; i++)
 778         s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
 779
 780     // generate taps
 781     ret = modified_levinson_durbin(s->window, s->window_size,
 782                 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
 783     if (ret < 0)
 784         return ret;
 785
 786     if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
 787         return ret;
 788
 789     for (ch = 0; ch < s->channels; ch++)
 790     {
 791         x = s->tail_size+ch;
 792         for (i = 0; i < s->block_align; i++)
 793         {
 794             int sum = 0;
 795             for (j = 0; j < s->downsampling; j++, x += s->channels)
 796                 sum += s->window[x];
 797             s->coded_samples[ch][i] = sum;
 798         }
 799     }
 800
 801     // simple rate control code
 802     if (!s->lossless)
 803     {
 804         double energy1 = 0.0, energy2 = 0.0;
 805         for (ch = 0; ch < s->channels; ch++)
 806         {
 807             for (i = 0; i < s->block_align; i++)
 808             {
 809                 double sample = s->coded_samples[ch][i];
 810                 energy2 += sample*sample;
 811                 energy1 += fabs(sample);
 812             }
 813         }
 814
 815         energy2 = sqrt(energy2/(s->channels*s->block_align));
 816         energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
 817
 818         // increase bitrate when samples are like a gaussian distribution
 819         // reduce bitrate when samples are like a two-tailed exponential distribution
 820
 821         if (energy2 > energy1)
 822             energy2 += (energy2-energy1)*RATE_VARIATION;
 823
 824         quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
 825 //        av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
 826
 827         quant = av_clip(quant, 1, 65534);
 828
 829         put_symbol(&c, state, quant, 0, NULL, NULL);
 830
 831         quant *= SAMPLE_FACTOR;
 832     }
 833
 834     // write out coded samples
 835     for (ch = 0; ch < s->channels; ch++)
 836     {
 837         if (!s->lossless)
 838             for (i = 0; i < s->block_align; i++)
 839                 s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
 840
 841         if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
 842             return ret;
 843     }
 844
 845 //    av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
 846
 847     avpkt->size = ff_rac_terminate(&c, 0);
 848     *got_packet_ptr = 1;
 849     return 0;
 850
 851 }
 852 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
 853
 854 #if CONFIG_SONIC_DECODER
 855 static const int samplerate_table[] =
 856     { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
 857
 858 static av_cold int sonic_decode_init(AVCodecContext *avctx)
 859 {
 860     SonicContext *s = avctx->priv_data;
 861     GetBitContext gb;
 862     int i;
 863     int ret;
 864
 865     s->channels = avctx->channels;
 866     s->samplerate = avctx->sample_rate;
 867
 868     if (!avctx->extradata)
 869     {
 870         av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
 871         return AVERROR_INVALIDDATA;
 872     }
 873
 874     ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
 875     if (ret < 0)
 876         return ret;
 877
 878     s->version = get_bits(&gb, 2);
 879     if (s->version >= 2) {
 880         s->version       = get_bits(&gb, 8);
 881         s->minor_version = get_bits(&gb, 8);
 882     }
 883     if (s->version != 2)
 884     {
 885         av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
 886         return AVERROR_INVALIDDATA;
 887     }
 888
 889     if (s->version >= 1)
 890     {
 891         int sample_rate_index;
 892         s->channels = get_bits(&gb, 2);
 893         sample_rate_index = get_bits(&gb, 4);
 894         if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
 895             av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
 896             return AVERROR_INVALIDDATA;
 897         }
 898         s->samplerate = samplerate_table[sample_rate_index];
 899         av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
 900             s->channels, s->samplerate);
 901     }
 902
 903     if (s->channels > MAX_CHANNELS || s->channels < 1)
 904     {
 905         av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
 906         return AVERROR_INVALIDDATA;
 907     }
 908     avctx->channels = s->channels;
 909
 910     s->lossless = get_bits1(&gb);
 911     if (!s->lossless)
 912         skip_bits(&gb, 3); // XXX FIXME
 913     s->decorrelation = get_bits(&gb, 2);
 914     if (s->decorrelation != 3 && s->channels != 2) {
 915         av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
 916         return AVERROR_INVALIDDATA;
 917     }
 918
 919     s->downsampling = get_bits(&gb, 2);
 920     if (!s->downsampling) {
 921         av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
 922         return AVERROR_INVALIDDATA;
 923     }
 924
 925     s->num_taps = (get_bits(&gb, 5)+1)<<5;
 926     if (get_bits1(&gb)) // XXX FIXME
 927         av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
 928
 929     s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
 930     s->frame_size = s->channels*s->block_align*s->downsampling;
 931 //    avctx->frame_size = s->block_align;
 932
 933     if (s->num_taps * s->channels > s->frame_size) {
 934         av_log(avctx, AV_LOG_ERROR,
 935                "number of taps times channels (%d * %d) larger than frame size %d\n",
 936                s->num_taps, s->channels, s->frame_size);
 937         return AVERROR_INVALIDDATA;
 938     }
 939
 940     av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
 941         s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
 942
 943     // generate taps
 944     s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
 945     if (!s->tap_quant)
 946         return AVERROR(ENOMEM);
 947
 948     for (i = 0; i < s->num_taps; i++)
 949         s->tap_quant[i] = ff_sqrt(i+1);
 950
 951     s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
 952
 953     for (i = 0; i < s->channels; i++)
 954     {
 955         s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state));
 956         if (!s->predictor_state[i])
 957             return AVERROR(ENOMEM);
 958     }
 959
 960     for (i = 0; i < s->channels; i++)
 961     {
 962         s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
 963         if (!s->coded_samples[i])
 964             return AVERROR(ENOMEM);
 965     }
 966     s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
 967     if (!s->int_samples)
 968         return AVERROR(ENOMEM);
 969
 970     avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 971     return 0;
 972 }
 973
 974 static av_cold int sonic_decode_close(AVCodecContext *avctx)
 975 {
 976     SonicContext *s = avctx->priv_data;
 977     int i;
 978
 979     av_freep(&s->int_samples);
 980     av_freep(&s->tap_quant);
 981     av_freep(&s->predictor_k);
 982
 983     for (i = 0; i < s->channels; i++)
 984     {
 985         av_freep(&s->predictor_state[i]);
 986         av_freep(&s->coded_samples[i]);
 987     }
 988
 989     return 0;
 990 }
 991
 992 static int sonic_decode_frame(AVCodecContext *avctx,
 993                             void *data, int *got_frame_ptr,
 994                             AVPacket *avpkt)
 995 {
 996     const uint8_t *buf = avpkt->data;
 997     int buf_size = avpkt->size;
 998     SonicContext *s = avctx->priv_data;
 999     RangeCoder c;
1000     uint8_t state[32];
1001     int i, quant, ch, j, ret;
1002     int16_t *samples;
1003     AVFrame *frame = data;
1004
1005     if (buf_size == 0) return 0;
1006
1007     frame->nb_samples = s->frame_size / avctx->channels;
1008     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
1009         return ret;
1010     samples = (int16_t *)frame->data[0];
1011
1012 //    av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
1013
1014     memset(state, 128, sizeof(state));
1015     ff_init_range_decoder(&c, buf, buf_size);
1016     ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
1017
1018     intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
1019
1020     // dequantize
1021     for (i = 0; i < s->num_taps; i++)
1022         s->predictor_k[i] *= s->tap_quant[i];
1023
1024     if (s->lossless)
1025         quant = 1;
1026     else
1027         quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
1028
1029 //    av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
1030
1031     for (ch = 0; ch < s->channels; ch++)
1032     {
1033         int x = ch;
1034
1035         predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
1036
1037         intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
1038
1039         for (i = 0; i < s->block_align; i++)
1040         {
1041             for (j = 0; j < s->downsampling - 1; j++)
1042             {
1043                 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
1044                 x += s->channels;
1045             }
1046
1047             s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
1048             x += s->channels;
1049         }
1050
1051         for (i = 0; i < s->num_taps; i++)
1052             s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
1053     }
1054
1055     switch(s->decorrelation)
1056     {
1057         case MID_SIDE:
1058             for (i = 0; i < s->frame_size; i += s->channels)
1059             {
1060                 s->int_samples[i+1] += shift(s->int_samples[i], 1);
1061                 s->int_samples[i] -= s->int_samples[i+1];
1062             }
1063             break;
1064         case LEFT_SIDE:
1065             for (i = 0; i < s->frame_size; i += s->channels)
1066                 s->int_samples[i+1] += s->int_samples[i];
1067             break;
1068         case RIGHT_SIDE:
1069             for (i = 0; i < s->frame_size; i += s->channels)
1070                 s->int_samples[i] += s->int_samples[i+1];
1071             break;
1072     }
1073
1074     if (!s->lossless)
1075         for (i = 0; i < s->frame_size; i++)
1076             s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
1077
1078     // internal -> short
1079     for (i = 0; i < s->frame_size; i++)
1080         samples[i] = av_clip_int16(s->int_samples[i]);
1081
1082     *got_frame_ptr = 1;
1083
1084     return buf_size;
1085 }
1086
1087 AVCodec ff_sonic_decoder = {
1088     .name           = "sonic",
1089     .long_name      = NULL_IF_CONFIG_SMALL("Sonic"),
1090     .type           = AVMEDIA_TYPE_AUDIO,
1091     .id             = AV_CODEC_ID_SONIC,
1092     .priv_data_size = sizeof(SonicContext),
1093     .init           = sonic_decode_init,
1094     .close          = sonic_decode_close,
1095     .decode         = sonic_decode_frame,
1096     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL,
1097 };
1098 #endif /* CONFIG_SONIC_DECODER */
1099
1100 #if CONFIG_SONIC_ENCODER
1101 AVCodec ff_sonic_encoder = {
1102     .name           = "sonic",
1103     .long_name      = NULL_IF_CONFIG_SMALL("Sonic"),
1104     .type           = AVMEDIA_TYPE_AUDIO,
1105     .id             = AV_CODEC_ID_SONIC,
1106     .priv_data_size = sizeof(SonicContext),
1107     .init           = sonic_encode_init,
1108     .encode2        = sonic_encode_frame,
1109     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1110     .capabilities   = AV_CODEC_CAP_EXPERIMENTAL,
1111     .close          = sonic_encode_close,
1112 };
1113 #endif
1114
1115 #if CONFIG_SONIC_LS_ENCODER
1116 AVCodec ff_sonic_ls_encoder = {
1117     .name           = "sonicls",
1118     .long_name      = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1119     .type           = AVMEDIA_TYPE_AUDIO,
1120     .id             = AV_CODEC_ID_SONIC_LS,
1121     .priv_data_size = sizeof(SonicContext),
1122     .init           = sonic_encode_init,
1123     .encode2        = sonic_encode_frame,
1124     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1125     .capabilities   = AV_CODEC_CAP_EXPERIMENTAL,
1126     .close          = sonic_encode_close,
1127 };
1128 #endif