git.sesse.net Git - ffmpeg/blob - libavcodec/ffwavesynth.c

   1 /*
   2  * Wavesynth pseudo-codec
   3  * Copyright (c) 2011 Nicolas George
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "libavutil/intreadwrite.h"
  23 #include "libavutil/log.h"
  24 #include "avcodec.h"
  25 #include "internal.h"
  26
  27
  28 #define SIN_BITS 14
  29 #define WS_MAX_CHANNELS 32
  30 #define INF_TS 0x7FFFFFFFFFFFFFFF
  31
  32 #define PINK_UNIT 128
  33
  34 /*
  35    Format of the extradata and packets
  36
  37    THIS INFORMATION IS NOT PART OF THE PUBLIC API OR ABI.
  38    IT CAN CHANGE WITHOUT NOTIFICATION.
  39
  40    All numbers are in little endian.
  41
  42    The codec extradata define a set of intervals with uniform content.
  43    Overlapping intervals are added together.
  44
  45    extradata:
  46        uint32      number of intervals
  47        ...         intervals
  48
  49    interval:
  50        int64       start timestamp; time_base must be 1/sample_rate;
  51                    start timestamps must be in ascending order
  52        int64       end timestamp
  53        uint32      type
  54        uint32      channels mask
  55        ...         additional information, depends on type
  56
  57    sine interval (type fourcc "SINE"):
  58        int32       start frequency, in 1/(1<<16) Hz
  59        int32       end frequency
  60        int32       start amplitude, 1<<16 is the full amplitude
  61        int32       end amplitude
  62        uint32      start phase, 0 is sin(0), 0x20000000 is sin(pi/2), etc.;
  63                    n | (1<<31) means to match the phase of previous channel #n
  64
  65    pink noise interval (type fourcc "NOIS"):
  66        int32       start amplitude
  67        int32       end amplitude
  68
  69    The input packets encode the time and duration of the requested segment.
  70
  71    packet:
  72        int64       start timestamp
  73        int32       duration
  74
  75 */
  76
  77 enum ws_interval_type {
  78     WS_SINE  = MKTAG('S','I','N','E'),
  79     WS_NOISE = MKTAG('N','O','I','S'),
  80 };
  81
  82 struct ws_interval {
  83     int64_t ts_start, ts_end;
  84     uint64_t phi0, dphi0, ddphi;
  85     uint64_t amp0, damp;
  86     uint64_t phi, dphi, amp;
  87     uint32_t channels;
  88     enum ws_interval_type type;
  89     int next;
  90 };
  91
  92 struct wavesynth_context {
  93     int64_t cur_ts;
  94     int64_t next_ts;
  95     int32_t *sin;
  96     struct ws_interval *inter;
  97     uint32_t dither_state;
  98     uint32_t pink_state;
  99     int32_t pink_pool[PINK_UNIT];
 100     unsigned pink_need, pink_pos;
 101     int nb_inter;
 102     int cur_inter;
 103     int next_inter;
 104 };
 105
 106 #define LCG_A 1284865837
 107 #define LCG_C 4150755663
 108 #define LCG_AI 849225893 /* A*AI = 1 [mod 1<<32] */
 109
 110 static uint32_t lcg_next(uint32_t *s)
 111 {
 112     *s = *s * LCG_A + LCG_C;
 113     return *s;
 114 }
 115
 116 static void lcg_seek(uint32_t *s, uint32_t dt)
 117 {
 118     uint32_t a, c, t = *s;
 119
 120     a = LCG_A;
 121     c = LCG_C;
 122     while (dt) {
 123         if (dt & 1)
 124             t = a * t + c;
 125         c *= a + 1; /* coefficients for a double step */
 126         a *= a;
 127         dt >>= 1;
 128     }
 129     *s = t;
 130 }
 131
 132 /* Emulate pink noise by summing white noise at the sampling frequency,
 133  * white noise at half the sampling frequency (each value taken twice),
 134  * etc., with a total of 8 octaves.
 135  * This is known as the Voss-McCartney algorithm. */
 136
 137 static void pink_fill(struct wavesynth_context *ws)
 138 {
 139     int32_t vt[7] = { 0 }, v = 0;
 140     int i, j;
 141
 142     ws->pink_pos = 0;
 143     if (!ws->pink_need)
 144         return;
 145     for (i = 0; i < PINK_UNIT; i++) {
 146         for (j = 0; j < 7; j++) {
 147             if ((i >> j) & 1)
 148                 break;
 149             v -= vt[j];
 150             vt[j] = (int32_t)lcg_next(&ws->pink_state) >> 3;
 151             v += vt[j];
 152         }
 153         ws->pink_pool[i] = v + ((int32_t)lcg_next(&ws->pink_state) >> 3);
 154     }
 155     lcg_next(&ws->pink_state); /* so we use exactly 256 steps */
 156 }
 157
 158 /**
 159  * @return  (1<<64) * a / b, without overflow, if a < b
 160  */
 161 static uint64_t frac64(uint64_t a, uint64_t b)
 162 {
 163     uint64_t r = 0;
 164     int i;
 165
 166     if (b < (uint64_t)1 << 32) { /* b small, use two 32-bits steps */
 167         a <<= 32;
 168         return ((a / b) << 32) | ((a % b) << 32) / b;
 169     }
 170     if (b < (uint64_t)1 << 48) { /* b medium, use four 16-bits steps */
 171         for (i = 0; i < 4; i++) {
 172             a <<= 16;
 173             r = (r << 16) | (a / b);
 174             a %= b;
 175         }
 176         return r;
 177     }
 178     for (i = 63; i >= 0; i--) {
 179         if (a >= (uint64_t)1 << 63 || a << 1 >= b) {
 180             r |= (uint64_t)1 << i;
 181             a = (a << 1) - b;
 182         } else {
 183             a <<= 1;
 184         }
 185     }
 186     return r;
 187 }
 188
 189 static uint64_t phi_at(struct ws_interval *in, int64_t ts)
 190 {
 191     uint64_t dt = ts - in->ts_start;
 192     uint64_t dt2 = dt & 1 ? /* dt * (dt - 1) / 2 without overflow */
 193                    dt * ((dt - 1) >> 1) : (dt >> 1) * (dt - 1);
 194     return in->phi0 + dt * in->dphi0 + dt2 * in->ddphi;
 195 }
 196
 197 static void wavesynth_seek(struct wavesynth_context *ws, int64_t ts)
 198 {
 199     int *last, i;
 200     struct ws_interval *in;
 201
 202     last = &ws->cur_inter;
 203     for (i = 0; i < ws->nb_inter; i++) {
 204         in = &ws->inter[i];
 205         if (ts < in->ts_start)
 206             break;
 207         if (ts >= in->ts_end)
 208             continue;
 209         *last = i;
 210         last = &in->next;
 211         in->phi  = phi_at(in, ts);
 212         in->dphi = in->dphi0 + (ts - in->ts_start) * in->ddphi;
 213         in->amp  = in->amp0  + (ts - in->ts_start) * in->damp;
 214     }
 215     ws->next_inter = i;
 216     ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
 217     *last = -1;
 218     lcg_seek(&ws->dither_state, (uint32_t)ts - (uint32_t)ws->cur_ts);
 219     if (ws->pink_need) {
 220         uint64_t pink_ts_cur  = (ws->cur_ts + (uint64_t)PINK_UNIT - 1) & ~(PINK_UNIT - 1);
 221         uint64_t pink_ts_next = ts & ~(PINK_UNIT - 1);
 222         int pos = ts & (PINK_UNIT - 1);
 223         lcg_seek(&ws->pink_state, (uint32_t)(pink_ts_next - pink_ts_cur) * 2);
 224         if (pos) {
 225             pink_fill(ws);
 226             ws->pink_pos = pos;
 227         } else {
 228             ws->pink_pos = PINK_UNIT;
 229         }
 230     }
 231     ws->cur_ts = ts;
 232 }
 233
 234 static int wavesynth_parse_extradata(AVCodecContext *avc)
 235 {
 236     struct wavesynth_context *ws = avc->priv_data;
 237     struct ws_interval *in;
 238     uint8_t *edata, *edata_end;
 239     int32_t f1, f2, a1, a2;
 240     uint32_t phi;
 241     int64_t dphi1, dphi2, dt, cur_ts = -0x8000000000000000;
 242     int i;
 243
 244     if (avc->extradata_size < 4)
 245         return AVERROR(EINVAL);
 246     edata = avc->extradata;
 247     edata_end = edata + avc->extradata_size;
 248     ws->nb_inter = AV_RL32(edata);
 249     edata += 4;
 250     if (ws->nb_inter < 0 || (edata_end - edata) / 24 < ws->nb_inter)
 251         return AVERROR(EINVAL);
 252     ws->inter = av_calloc(ws->nb_inter, sizeof(*ws->inter));
 253     if (!ws->inter)
 254         return AVERROR(ENOMEM);
 255     for (i = 0; i < ws->nb_inter; i++) {
 256         in = &ws->inter[i];
 257         if (edata_end - edata < 24)
 258             return AVERROR(EINVAL);
 259         in->ts_start = AV_RL64(edata +  0);
 260         in->ts_end   = AV_RL64(edata +  8);
 261         in->type     = AV_RL32(edata + 16);
 262         in->channels = AV_RL32(edata + 20);
 263         edata += 24;
 264         if (in->ts_start < cur_ts ||
 265             in->ts_end <= in->ts_start ||
 266             (uint64_t)in->ts_end - in->ts_start > INT64_MAX
 267         )
 268             return AVERROR(EINVAL);
 269         cur_ts = in->ts_start;
 270         dt = in->ts_end - in->ts_start;
 271         switch (in->type) {
 272             case WS_SINE:
 273                 if (edata_end - edata < 20 || avc->sample_rate <= 0)
 274                     return AVERROR(EINVAL);
 275                 f1  = AV_RL32(edata +  0);
 276                 f2  = AV_RL32(edata +  4);
 277                 a1  = AV_RL32(edata +  8);
 278                 a2  = AV_RL32(edata + 12);
 279                 phi = AV_RL32(edata + 16);
 280                 edata += 20;
 281                 dphi1 = frac64(f1, (int64_t)avc->sample_rate << 16);
 282                 dphi2 = frac64(f2, (int64_t)avc->sample_rate << 16);
 283                 in->dphi0 = dphi1;
 284                 in->ddphi = (int64_t)(dphi2 - (uint64_t)dphi1) / dt;
 285                 if (phi & 0x80000000) {
 286                     phi &= ~0x80000000;
 287                     if (phi >= i)
 288                         return AVERROR(EINVAL);
 289                     in->phi0 = phi_at(&ws->inter[phi], in->ts_start);
 290                 } else {
 291                     in->phi0 = (uint64_t)phi << 33;
 292                 }
 293                 break;
 294             case WS_NOISE:
 295                 if (edata_end - edata < 8)
 296                     return AVERROR(EINVAL);
 297                 a1  = AV_RL32(edata +  0);
 298                 a2  = AV_RL32(edata +  4);
 299                 edata += 8;
 300                 break;
 301             default:
 302                 return AVERROR(EINVAL);
 303         }
 304         in->amp0 = (uint64_t)a1 << 32;
 305         in->damp = (int64_t)(((uint64_t)a2 << 32) - ((uint64_t)a1 << 32)) / dt;
 306     }
 307     if (edata != edata_end)
 308         return AVERROR(EINVAL);
 309     return 0;
 310 }
 311
 312 static av_cold int wavesynth_init(AVCodecContext *avc)
 313 {
 314     struct wavesynth_context *ws = avc->priv_data;
 315     int i, r;
 316
 317     if (avc->channels > WS_MAX_CHANNELS) {
 318         av_log(avc, AV_LOG_ERROR,
 319                "This implementation is limited to %d channels.\n",
 320                WS_MAX_CHANNELS);
 321         return AVERROR(EINVAL);
 322     }
 323     r = wavesynth_parse_extradata(avc);
 324     if (r < 0) {
 325         av_log(avc, AV_LOG_ERROR, "Invalid intervals definitions.\n");
 326         return r;
 327     }
 328     ws->sin = av_malloc(sizeof(*ws->sin) << SIN_BITS);
 329     if (!ws->sin)
 330         return AVERROR(ENOMEM);
 331     for (i = 0; i < 1 << SIN_BITS; i++)
 332         ws->sin[i] = floor(32767 * sin(2 * M_PI * i / (1 << SIN_BITS)));
 333     ws->dither_state = MKTAG('D','I','T','H');
 334     for (i = 0; i < ws->nb_inter; i++)
 335         ws->pink_need += ws->inter[i].type == WS_NOISE;
 336     ws->pink_state = MKTAG('P','I','N','K');
 337     ws->pink_pos = PINK_UNIT;
 338     wavesynth_seek(ws, 0);
 339     avc->sample_fmt = AV_SAMPLE_FMT_S16;
 340     return 0;
 341 }
 342
 343 static void wavesynth_synth_sample(struct wavesynth_context *ws, int64_t ts,
 344                                    int32_t *channels)
 345 {
 346     int32_t amp, *cv;
 347     unsigned val;
 348     struct ws_interval *in;
 349     int i, *last, pink;
 350     uint32_t c, all_ch = 0;
 351
 352     i = ws->cur_inter;
 353     last = &ws->cur_inter;
 354     if (ws->pink_pos == PINK_UNIT)
 355         pink_fill(ws);
 356     pink = ws->pink_pool[ws->pink_pos++] >> 16;
 357     while (i >= 0) {
 358         in = &ws->inter[i];
 359         i = in->next;
 360         if (ts >= in->ts_end) {
 361             *last = i;
 362             continue;
 363         }
 364         last = &in->next;
 365         amp = in->amp >> 32;
 366         in->amp  += in->damp;
 367         switch (in->type) {
 368             case WS_SINE:
 369                 val = amp * (unsigned)ws->sin[in->phi >> (64 - SIN_BITS)];
 370                 in->phi  += in->dphi;
 371                 in->dphi += in->ddphi;
 372                 break;
 373             case WS_NOISE:
 374                 val = amp * (unsigned)pink;
 375                 break;
 376             default:
 377                 val = 0;
 378         }
 379         all_ch |= in->channels;
 380         for (c = in->channels, cv = channels; c; c >>= 1, cv++)
 381             if (c & 1)
 382                 *cv += (unsigned)val;
 383     }
 384     val = (int32_t)lcg_next(&ws->dither_state) >> 16;
 385     for (c = all_ch, cv = channels; c; c >>= 1, cv++)
 386         if (c & 1)
 387             *cv += val;
 388 }
 389
 390 static void wavesynth_enter_intervals(struct wavesynth_context *ws, int64_t ts)
 391 {
 392     int *last, i;
 393     struct ws_interval *in;
 394
 395     last = &ws->cur_inter;
 396     for (i = ws->cur_inter; i >= 0; i = ws->inter[i].next)
 397         last = &ws->inter[i].next;
 398     for (i = ws->next_inter; i < ws->nb_inter; i++) {
 399         in = &ws->inter[i];
 400         if (ts < in->ts_start)
 401             break;
 402         if (ts >= in->ts_end)
 403             continue;
 404         *last = i;
 405         last = &in->next;
 406         in->phi = in->phi0;
 407         in->dphi = in->dphi0;
 408         in->amp = in->amp0;
 409     }
 410     ws->next_inter = i;
 411     ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
 412     *last = -1;
 413 }
 414
 415 static int wavesynth_decode(AVCodecContext *avc, void *rframe, int *rgot_frame,
 416                             AVPacket *packet)
 417 {
 418     struct wavesynth_context *ws = avc->priv_data;
 419     AVFrame *frame = rframe;
 420     int64_t ts;
 421     int duration;
 422     int s, c, r;
 423     int16_t *pcm;
 424     int32_t channels[WS_MAX_CHANNELS];
 425
 426     *rgot_frame = 0;
 427     if (packet->size != 12)
 428         return AVERROR_INVALIDDATA;
 429     ts = AV_RL64(packet->data);
 430     if (ts != ws->cur_ts)
 431         wavesynth_seek(ws, ts);
 432     duration = AV_RL32(packet->data + 8);
 433     if (duration <= 0)
 434         return AVERROR(EINVAL);
 435     frame->nb_samples = duration;
 436     r = ff_get_buffer(avc, frame, 0);
 437     if (r < 0)
 438         return r;
 439     pcm = (int16_t *)frame->data[0];
 440     for (s = 0; s < duration; s++, ts+=(uint64_t)1) {
 441         memset(channels, 0, avc->channels * sizeof(*channels));
 442         if (ts >= ws->next_ts)
 443             wavesynth_enter_intervals(ws, ts);
 444         wavesynth_synth_sample(ws, ts, channels);
 445         for (c = 0; c < avc->channels; c++)
 446             *(pcm++) = channels[c] >> 16;
 447     }
 448     ws->cur_ts += (uint64_t)duration;
 449     *rgot_frame = 1;
 450     return packet->size;
 451 }
 452
 453 static av_cold int wavesynth_close(AVCodecContext *avc)
 454 {
 455     struct wavesynth_context *ws = avc->priv_data;
 456
 457     av_freep(&ws->sin);
 458     av_freep(&ws->inter);
 459     return 0;
 460 }
 461
 462 AVCodec ff_ffwavesynth_decoder = {
 463     .name           = "wavesynth",
 464     .long_name      = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
 465     .type           = AVMEDIA_TYPE_AUDIO,
 466     .id             = AV_CODEC_ID_FFWAVESYNTH,
 467     .priv_data_size = sizeof(struct wavesynth_context),
 468     .init           = wavesynth_init,
 469     .close          = wavesynth_close,
 470     .decode         = wavesynth_decode,
 471     .capabilities   = AV_CODEC_CAP_DR1,
 472     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 473 };