git.sesse.net Git - ffmpeg/blob - libavcodec/wmaenc.c

   1 /*
   2  * WMA compatible encoder
   3  * Copyright (c) 2007 Michael Niedermayer
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "libavutil/attributes.h"
  23
  24 #include "avcodec.h"
  25 #include "internal.h"
  26 #include "wma.h"
  27 #include "libavutil/avassert.h"
  28
  29
  30 static av_cold int encode_init(AVCodecContext *avctx)
  31 {
  32     WMACodecContext *s = avctx->priv_data;
  33     int i, flags1, flags2, block_align;
  34     uint8_t *extradata;
  35
  36     s->avctx = avctx;
  37
  38     if (avctx->channels > MAX_CHANNELS) {
  39         av_log(avctx, AV_LOG_ERROR,
  40                "too many channels: got %i, need %i or fewer\n",
  41                avctx->channels, MAX_CHANNELS);
  42         return AVERROR(EINVAL);
  43     }
  44
  45     if (avctx->sample_rate > 48000) {
  46         av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
  47                avctx->sample_rate);
  48         return AVERROR(EINVAL);
  49     }
  50
  51     if (avctx->bit_rate < 24 * 1000) {
  52         av_log(avctx, AV_LOG_ERROR,
  53                "bitrate too low: got %i, need 24000 or higher\n",
  54                avctx->bit_rate);
  55         return AVERROR(EINVAL);
  56     }
  57
  58     /* extract flag infos */
  59     flags1 = 0;
  60     flags2 = 1;
  61     if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
  62         extradata             = av_malloc(4);
  63         if (!extradata)
  64             return AVERROR(ENOMEM);
  65         avctx->extradata_size = 4;
  66         AV_WL16(extradata, flags1);
  67         AV_WL16(extradata + 2, flags2);
  68     } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
  69         extradata             = av_mallocz(10);
  70         if (!extradata)
  71             return AVERROR(ENOMEM);
  72         avctx->extradata_size = 10;
  73         AV_WL32(extradata, flags1);
  74         AV_WL16(extradata + 4, flags2);
  75     } else {
  76         av_assert0(0);
  77     }
  78     avctx->extradata          = extradata;
  79     s->use_exp_vlc            = flags2 & 0x0001;
  80     s->use_bit_reservoir      = flags2 & 0x0002;
  81     s->use_variable_block_len = flags2 & 0x0004;
  82     if (avctx->channels == 2)
  83         s->ms_stereo = 1;
  84
  85     ff_wma_init(avctx, flags2);
  86
  87     /* init MDCT */
  88     for (i = 0; i < s->nb_block_sizes; i++)
  89         ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
  90
  91     block_align        = avctx->bit_rate * (int64_t) s->frame_len /
  92                          (avctx->sample_rate * 8);
  93     block_align        = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
  94     avctx->block_align = block_align;
  95     avctx->frame_size = avctx->initial_padding = s->frame_len;
  96
  97     return 0;
  98 }
  99
 100 static void apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
 101 {
 102     WMACodecContext *s = avctx->priv_data;
 103     float **audio      = (float **) frame->extended_data;
 104     int len            = frame->nb_samples;
 105     int window_index   = s->frame_len_bits - s->block_len_bits;
 106     FFTContext *mdct   = &s->mdct_ctx[window_index];
 107     int ch;
 108     const float *win   = s->windows[window_index];
 109     int window_len     = 1 << s->block_len_bits;
 110     float n            = 2.0 * 32768.0 / window_len;
 111
 112     for (ch = 0; ch < avctx->channels; ch++) {
 113         memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
 114         s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
 115         s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
 116                                     win, len);
 117         s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
 118         mdct->mdct_calc(mdct, s->coefs[ch], s->output);
 119     }
 120 }
 121
 122 // FIXME use for decoding too
 123 static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
 124 {
 125     int n;
 126     const uint16_t *ptr;
 127     float v, *q, max_scale, *q_end;
 128
 129     ptr       = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
 130     q         = s->exponents[ch];
 131     q_end     = q + s->block_len;
 132     max_scale = 0;
 133     while (q < q_end) {
 134         /* XXX: use a table */
 135         v         = pow(10, *exp_param++ *(1.0 / 16.0));
 136         max_scale = FFMAX(max_scale, v);
 137         n         = *ptr++;
 138         do {
 139             *q++ = v;
 140         } while (--n);
 141     }
 142     s->max_exponent[ch] = max_scale;
 143 }
 144
 145 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
 146 {
 147     int last_exp;
 148     const uint16_t *ptr;
 149     float *q, *q_end;
 150
 151     ptr   = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
 152     q     = s->exponents[ch];
 153     q_end = q + s->block_len;
 154     if (s->version == 1) {
 155         last_exp = *exp_param++;
 156         av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32);
 157         put_bits(&s->pb, 5, last_exp - 10);
 158         q += *ptr++;
 159     } else
 160         last_exp = 36;
 161     while (q < q_end) {
 162         int exp  = *exp_param++;
 163         int code = exp - last_exp + 60;
 164         av_assert1(code >= 0 && code < 120);
 165         put_bits(&s->pb, ff_aac_scalefactor_bits[code],
 166                  ff_aac_scalefactor_code[code]);
 167         /* XXX: use a table */
 168         q       += *ptr++;
 169         last_exp = exp;
 170     }
 171 }
 172
 173 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
 174                         int total_gain)
 175 {
 176     int v, bsize, ch, coef_nb_bits, parse_exponents;
 177     float mdct_norm;
 178     int nb_coefs[MAX_CHANNELS];
 179     static const int fixed_exp[25] = {
 180         20, 20, 20, 20, 20,
 181         20, 20, 20, 20, 20,
 182         20, 20, 20, 20, 20,
 183         20, 20, 20, 20, 20,
 184         20, 20, 20, 20, 20
 185     };
 186
 187     // FIXME remove duplication relative to decoder
 188     if (s->use_variable_block_len) {
 189         av_assert0(0); // FIXME not implemented
 190     } else {
 191         /* fixed block len */
 192         s->next_block_len_bits = s->frame_len_bits;
 193         s->prev_block_len_bits = s->frame_len_bits;
 194         s->block_len_bits      = s->frame_len_bits;
 195     }
 196
 197     s->block_len = 1 << s->block_len_bits;
 198 //     av_assert0((s->block_pos + s->block_len) <= s->frame_len);
 199     bsize = s->frame_len_bits - s->block_len_bits;
 200
 201     // FIXME factor
 202     v = s->coefs_end[bsize] - s->coefs_start;
 203     for (ch = 0; ch < s->avctx->channels; ch++)
 204         nb_coefs[ch] = v;
 205     {
 206         int n4 = s->block_len / 2;
 207         mdct_norm = 1.0 / (float) n4;
 208         if (s->version == 1)
 209             mdct_norm *= sqrt(n4);
 210     }
 211
 212     if (s->avctx->channels == 2)
 213         put_bits(&s->pb, 1, !!s->ms_stereo);
 214
 215     for (ch = 0; ch < s->avctx->channels; ch++) {
 216         // FIXME only set channel_coded when needed, instead of always
 217         s->channel_coded[ch] = 1;
 218         if (s->channel_coded[ch])
 219             init_exp(s, ch, fixed_exp);
 220     }
 221
 222     for (ch = 0; ch < s->avctx->channels; ch++) {
 223         if (s->channel_coded[ch]) {
 224             WMACoef *coefs1;
 225             float *coefs, *exponents, mult;
 226             int i, n;
 227
 228             coefs1    = s->coefs1[ch];
 229             exponents = s->exponents[ch];
 230             mult      = pow(10, total_gain * 0.05) / s->max_exponent[ch];
 231             mult     *= mdct_norm;
 232             coefs     = src_coefs[ch];
 233             if (s->use_noise_coding && 0) {
 234                 av_assert0(0); // FIXME not implemented
 235             } else {
 236                 coefs += s->coefs_start;
 237                 n      = nb_coefs[ch];
 238                 for (i = 0; i < n; i++) {
 239                     double t = *coefs++ / (exponents[i] * mult);
 240                     if (t < -32768 || t > 32767)
 241                         return -1;
 242
 243                     coefs1[i] = lrint(t);
 244                 }
 245             }
 246         }
 247     }
 248
 249     v = 0;
 250     for (ch = 0; ch < s->avctx->channels; ch++) {
 251         int a = s->channel_coded[ch];
 252         put_bits(&s->pb, 1, a);
 253         v |= a;
 254     }
 255
 256     if (!v)
 257         return 1;
 258
 259     for (v = total_gain - 1; v >= 127; v -= 127)
 260         put_bits(&s->pb, 7, 127);
 261     put_bits(&s->pb, 7, v);
 262
 263     coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
 264
 265     if (s->use_noise_coding) {
 266         for (ch = 0; ch < s->avctx->channels; ch++) {
 267             if (s->channel_coded[ch]) {
 268                 int i, n;
 269                 n = s->exponent_high_sizes[bsize];
 270                 for (i = 0; i < n; i++) {
 271                     put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
 272                     if (0)
 273                         nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
 274                 }
 275             }
 276         }
 277     }
 278
 279     parse_exponents = 1;
 280     if (s->block_len_bits != s->frame_len_bits)
 281         put_bits(&s->pb, 1, parse_exponents);
 282
 283     if (parse_exponents) {
 284         for (ch = 0; ch < s->avctx->channels; ch++) {
 285             if (s->channel_coded[ch]) {
 286                 if (s->use_exp_vlc) {
 287                     encode_exp_vlc(s, ch, fixed_exp);
 288                 } else {
 289                     av_assert0(0); // FIXME not implemented
 290 //                    encode_exp_lsp(s, ch);
 291                 }
 292             }
 293         }
 294     } else
 295         av_assert0(0); // FIXME not implemented
 296
 297     for (ch = 0; ch < s->avctx->channels; ch++) {
 298         if (s->channel_coded[ch]) {
 299             int run, tindex;
 300             WMACoef *ptr, *eptr;
 301             tindex = (ch == 1 && s->ms_stereo);
 302             ptr    = &s->coefs1[ch][0];
 303             eptr   = ptr + nb_coefs[ch];
 304
 305             run = 0;
 306             for (; ptr < eptr; ptr++) {
 307                 if (*ptr) {
 308                     int level     = *ptr;
 309                     int abs_level = FFABS(level);
 310                     int code      = 0;
 311                     if (abs_level <= s->coef_vlcs[tindex]->max_level)
 312                         if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
 313                             code = run + s->int_table[tindex][abs_level - 1];
 314
 315                     av_assert2(code < s->coef_vlcs[tindex]->n);
 316                     put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
 317                              s->coef_vlcs[tindex]->huffcodes[code]);
 318
 319                     if (code == 0) {
 320                         if (1 << coef_nb_bits <= abs_level)
 321                             return -1;
 322
 323                         put_bits(&s->pb, coef_nb_bits, abs_level);
 324                         put_bits(&s->pb, s->frame_len_bits, run);
 325                     }
 326                     // FIXME the sign is flipped somewhere
 327                     put_bits(&s->pb, 1, level < 0);
 328                     run = 0;
 329                 } else
 330                     run++;
 331             }
 332             if (run)
 333                 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
 334                          s->coef_vlcs[tindex]->huffcodes[1]);
 335         }
 336         if (s->version == 1 && s->avctx->channels >= 2)
 337             avpriv_align_put_bits(&s->pb);
 338     }
 339     return 0;
 340 }
 341
 342 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
 343                         uint8_t *buf, int buf_size, int total_gain)
 344 {
 345     init_put_bits(&s->pb, buf, buf_size);
 346
 347     if (s->use_bit_reservoir)
 348         av_assert0(0); // FIXME not implemented
 349     else if (encode_block(s, src_coefs, total_gain) < 0)
 350         return INT_MAX;
 351
 352     avpriv_align_put_bits(&s->pb);
 353
 354     return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
 355 }
 356
 357 static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
 358                              const AVFrame *frame, int *got_packet_ptr)
 359 {
 360     WMACodecContext *s = avctx->priv_data;
 361     int i, total_gain, ret, error;
 362
 363     s->block_len_bits = s->frame_len_bits; // required by non variable block len
 364     s->block_len      = 1 << s->block_len_bits;
 365
 366     apply_window_and_mdct(avctx, frame);
 367
 368     if (s->ms_stereo) {
 369         float a, b;
 370         int i;
 371
 372         for (i = 0; i < s->block_len; i++) {
 373             a              = s->coefs[0][i] * 0.5;
 374             b              = s->coefs[1][i] * 0.5;
 375             s->coefs[0][i] = a + b;
 376             s->coefs[1][i] = a - b;
 377         }
 378     }
 379
 380     if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
 381         return ret;
 382
 383     total_gain = 128;
 384     for (i = 64; i; i >>= 1) {
 385         error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
 386                                  total_gain - i);
 387         if (error <= 0)
 388             total_gain -= i;
 389     }
 390
 391     while(total_gain <= 128 && error > 0)
 392         error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
 393     if (error > 0) {
 394         av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
 395         avpkt->size = 0;
 396         return AVERROR(EINVAL);
 397     }
 398     av_assert0((put_bits_count(&s->pb) & 7) == 0);
 399     i= avctx->block_align - (put_bits_count(&s->pb)+7)/8;
 400     av_assert0(i>=0);
 401     while(i--)
 402         put_bits(&s->pb, 8, 'N');
 403
 404     flush_put_bits(&s->pb);
 405     av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
 406
 407     if (frame->pts != AV_NOPTS_VALUE)
 408         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 409
 410     avpkt->size     = avctx->block_align;
 411     *got_packet_ptr = 1;
 412     return 0;
 413 }
 414
 415 #if CONFIG_WMAV1_ENCODER
 416 AVCodec ff_wmav1_encoder = {
 417     .name           = "wmav1",
 418     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
 419     .type           = AVMEDIA_TYPE_AUDIO,
 420     .id             = AV_CODEC_ID_WMAV1,
 421     .priv_data_size = sizeof(WMACodecContext),
 422     .init           = encode_init,
 423     .encode2        = encode_superframe,
 424     .close          = ff_wma_end,
 425     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
 426                                                       AV_SAMPLE_FMT_NONE },
 427 };
 428 #endif
 429 #if CONFIG_WMAV2_ENCODER
 430 AVCodec ff_wmav2_encoder = {
 431     .name           = "wmav2",
 432     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
 433     .type           = AVMEDIA_TYPE_AUDIO,
 434     .id             = AV_CODEC_ID_WMAV2,
 435     .priv_data_size = sizeof(WMACodecContext),
 436     .init           = encode_init,
 437     .encode2        = encode_superframe,
 438     .close          = ff_wma_end,
 439     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
 440                                                       AV_SAMPLE_FMT_NONE },
 441 };
 442 #endif