2 * Wavesynth pseudo-codec
3 * Copyright (c) 2011 Nicolas George
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/intreadwrite.h"
23 #include "libavutil/log.h"
29 #define WS_MAX_CHANNELS 32
30 #define INF_TS 0x7FFFFFFFFFFFFFFF
35 Format of the extradata and packets
37 THIS INFORMATION IS NOT PART OF THE PUBLIC API OR ABI.
38 IT CAN CHANGE WITHOUT NOTIFICATION.
40 All numbers are in little endian.
42 The codec extradata define a set of intervals with uniform content.
43 Overlapping intervals are added together.
46 uint32 number of intervals
50 int64 start timestamp; time_base must be 1/sample_rate;
51 start timestamps must be in ascending order
55 ... additional information, depends on type
57 sine interval (type fourcc "SINE"):
58 int32 start frequency, in 1/(1<<16) Hz
60 int32 start amplitude, 1<<16 is the full amplitude
62 uint32 start phase, 0 is sin(0), 0x20000000 is sin(pi/2), etc.;
63 n | (1<<31) means to match the phase of previous channel #n
65 pink noise interval (type fourcc "NOIS"):
69 The input packets encode the time and duration of the requested segment.
77 enum ws_interval_type {
78 WS_SINE = MKTAG('S','I','N','E'),
79 WS_NOISE = MKTAG('N','O','I','S'),
83 int64_t ts_start, ts_end;
84 uint64_t phi0, dphi0, ddphi;
86 uint64_t phi, dphi, amp;
88 enum ws_interval_type type;
92 struct wavesynth_context {
96 struct ws_interval *inter;
97 uint32_t dither_state;
99 int32_t pink_pool[PINK_UNIT];
100 unsigned pink_need, pink_pos;
106 #define LCG_A 1284865837
107 #define LCG_C 4150755663
108 #define LCG_AI 849225893 /* A*AI = 1 [mod 1<<32] */
110 static uint32_t lcg_next(uint32_t *s)
112 *s = *s * LCG_A + LCG_C;
116 static void lcg_seek(uint32_t *s, int64_t dt)
118 uint32_t a, c, t = *s;
123 } else { /* coefficients for a step backward */
125 c = (uint32_t)(-LCG_AI * LCG_C);
131 c *= a + 1; /* coefficients for a double step */
138 /* Emulate pink noise by summing white noise at the sampling frequency,
139 * white noise at half the sampling frequency (each value taken twice),
140 * etc., with a total of 8 octaves.
141 * This is known as the Voss-McCartney algorithm. */
143 static void pink_fill(struct wavesynth_context *ws)
145 int32_t vt[7] = { 0 }, v = 0;
151 for (i = 0; i < PINK_UNIT; i++) {
152 for (j = 0; j < 7; j++) {
156 vt[j] = (int32_t)lcg_next(&ws->pink_state) >> 3;
159 ws->pink_pool[i] = v + ((int32_t)lcg_next(&ws->pink_state) >> 3);
161 lcg_next(&ws->pink_state); /* so we use exactly 256 steps */
165 * @return (1<<64) * a / b, without overflow, if a < b
167 static uint64_t frac64(uint64_t a, uint64_t b)
172 if (b < (uint64_t)1 << 32) { /* b small, use two 32-bits steps */
174 return ((a / b) << 32) | ((a % b) << 32) / b;
176 if (b < (uint64_t)1 << 48) { /* b medium, use four 16-bits steps */
177 for (i = 0; i < 4; i++) {
179 r = (r << 16) | (a / b);
184 for (i = 63; i >= 0; i--) {
185 if (a >= (uint64_t)1 << 63 || a << 1 >= b) {
186 r |= (uint64_t)1 << i;
195 static uint64_t phi_at(struct ws_interval *in, int64_t ts)
197 uint64_t dt = ts - in->ts_start;
198 uint64_t dt2 = dt & 1 ? /* dt * (dt - 1) / 2 without overflow */
199 dt * ((dt - 1) >> 1) : (dt >> 1) * (dt - 1);
200 return in->phi0 + dt * in->dphi0 + dt2 * in->ddphi;
203 static void wavesynth_seek(struct wavesynth_context *ws, int64_t ts)
206 struct ws_interval *in;
208 last = &ws->cur_inter;
209 for (i = 0; i < ws->nb_inter; i++) {
211 if (ts < in->ts_start)
213 if (ts >= in->ts_end)
217 in->phi = phi_at(in, ts);
218 in->dphi = in->dphi0 + (ts - in->ts_start) * in->ddphi;
219 in->amp = in->amp0 + (ts - in->ts_start) * in->damp;
222 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
224 lcg_seek(&ws->dither_state, ts - ws->cur_ts);
226 int64_t pink_ts_cur = (ws->cur_ts + PINK_UNIT - 1) & ~(PINK_UNIT - 1);
227 int64_t pink_ts_next = ts & ~(PINK_UNIT - 1);
228 int pos = ts & (PINK_UNIT - 1);
229 lcg_seek(&ws->pink_state, (pink_ts_next - pink_ts_cur) << 1);
234 ws->pink_pos = PINK_UNIT;
240 static int wavesynth_parse_extradata(AVCodecContext *avc)
242 struct wavesynth_context *ws = avc->priv_data;
243 struct ws_interval *in;
244 uint8_t *edata, *edata_end;
245 int32_t f1, f2, a1, a2;
247 int64_t dphi1, dphi2, dt, cur_ts = -0x8000000000000000;
250 if (avc->extradata_size < 4)
251 return AVERROR(EINVAL);
252 edata = avc->extradata;
253 edata_end = edata + avc->extradata_size;
254 ws->nb_inter = AV_RL32(edata);
256 if (ws->nb_inter < 0)
257 return AVERROR(EINVAL);
258 ws->inter = av_calloc(ws->nb_inter, sizeof(*ws->inter));
260 return AVERROR(ENOMEM);
261 for (i = 0; i < ws->nb_inter; i++) {
263 if (edata_end - edata < 24)
264 return AVERROR(EINVAL);
265 in->ts_start = AV_RL64(edata + 0);
266 in->ts_end = AV_RL64(edata + 8);
267 in->type = AV_RL32(edata + 16);
268 in->channels = AV_RL32(edata + 20);
270 if (in->ts_start < cur_ts ||
271 in->ts_end <= in->ts_start ||
272 (uint64_t)in->ts_end - in->ts_start > INT64_MAX
274 return AVERROR(EINVAL);
275 cur_ts = in->ts_start;
276 dt = in->ts_end - in->ts_start;
279 if (edata_end - edata < 20)
280 return AVERROR(EINVAL);
281 f1 = AV_RL32(edata + 0);
282 f2 = AV_RL32(edata + 4);
283 a1 = AV_RL32(edata + 8);
284 a2 = AV_RL32(edata + 12);
285 phi = AV_RL32(edata + 16);
287 dphi1 = frac64(f1, (int64_t)avc->sample_rate << 16);
288 dphi2 = frac64(f2, (int64_t)avc->sample_rate << 16);
290 in->ddphi = (dphi2 - dphi1) / dt;
291 if (phi & 0x80000000) {
294 return AVERROR(EINVAL);
295 in->phi0 = phi_at(&ws->inter[phi], in->ts_start);
297 in->phi0 = (uint64_t)phi << 33;
301 if (edata_end - edata < 8)
302 return AVERROR(EINVAL);
303 a1 = AV_RL32(edata + 0);
304 a2 = AV_RL32(edata + 4);
308 return AVERROR(EINVAL);
310 in->amp0 = (int64_t)a1 << 32;
311 in->damp = (((int64_t)a2 << 32) - ((int64_t)a1 << 32)) / dt;
313 if (edata != edata_end)
314 return AVERROR(EINVAL);
318 static av_cold int wavesynth_init(AVCodecContext *avc)
320 struct wavesynth_context *ws = avc->priv_data;
323 if (avc->channels > WS_MAX_CHANNELS) {
324 av_log(avc, AV_LOG_ERROR,
325 "This implementation is limited to %d channels.\n",
327 return AVERROR(EINVAL);
329 r = wavesynth_parse_extradata(avc);
331 av_log(avc, AV_LOG_ERROR, "Invalid intervals definitions.\n");
334 ws->sin = av_malloc(sizeof(*ws->sin) << SIN_BITS);
339 for (i = 0; i < 1 << SIN_BITS; i++)
340 ws->sin[i] = floor(32767 * sin(2 * M_PI * i / (1 << SIN_BITS)));
341 ws->dither_state = MKTAG('D','I','T','H');
342 for (i = 0; i < ws->nb_inter; i++)
343 ws->pink_need += ws->inter[i].type == WS_NOISE;
344 ws->pink_state = MKTAG('P','I','N','K');
345 ws->pink_pos = PINK_UNIT;
346 wavesynth_seek(ws, 0);
347 avc->sample_fmt = AV_SAMPLE_FMT_S16;
351 av_freep(&ws->inter);
356 static void wavesynth_synth_sample(struct wavesynth_context *ws, int64_t ts,
359 int32_t amp, val, *cv;
360 struct ws_interval *in;
362 uint32_t c, all_ch = 0;
365 last = &ws->cur_inter;
366 if (ws->pink_pos == PINK_UNIT)
368 pink = ws->pink_pool[ws->pink_pos++] >> 16;
372 if (ts >= in->ts_end) {
381 val = amp * ws->sin[in->phi >> (64 - SIN_BITS)];
383 in->dphi += in->ddphi;
391 all_ch |= in->channels;
392 for (c = in->channels, cv = channels; c; c >>= 1, cv++)
396 val = (int32_t)lcg_next(&ws->dither_state) >> 16;
397 for (c = all_ch, cv = channels; c; c >>= 1, cv++)
402 static void wavesynth_enter_intervals(struct wavesynth_context *ws, int64_t ts)
405 struct ws_interval *in;
407 last = &ws->cur_inter;
408 for (i = ws->cur_inter; i >= 0; i = ws->inter[i].next)
409 last = &ws->inter[i].next;
410 for (i = ws->next_inter; i < ws->nb_inter; i++) {
412 if (ts < in->ts_start)
414 if (ts >= in->ts_end)
419 in->dphi = in->dphi0;
423 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
427 static int wavesynth_decode(AVCodecContext *avc, void *rframe, int *rgot_frame,
430 struct wavesynth_context *ws = avc->priv_data;
431 AVFrame *frame = rframe;
436 int32_t channels[WS_MAX_CHANNELS];
439 if (packet->size != 12)
440 return AVERROR_INVALIDDATA;
441 ts = AV_RL64(packet->data);
442 if (ts != ws->cur_ts)
443 wavesynth_seek(ws, ts);
444 duration = AV_RL32(packet->data + 8);
446 return AVERROR(EINVAL);
447 frame->nb_samples = duration;
448 r = ff_get_buffer(avc, frame, 0);
451 pcm = (int16_t *)frame->data[0];
452 for (s = 0; s < duration; s++, ts++) {
453 memset(channels, 0, avc->channels * sizeof(*channels));
454 if (ts >= ws->next_ts)
455 wavesynth_enter_intervals(ws, ts);
456 wavesynth_synth_sample(ws, ts, channels);
457 for (c = 0; c < avc->channels; c++)
458 *(pcm++) = channels[c] >> 16;
460 ws->cur_ts += duration;
465 static av_cold int wavesynth_close(AVCodecContext *avc)
467 struct wavesynth_context *ws = avc->priv_data;
470 av_freep(&ws->inter);
474 AVCodec ff_ffwavesynth_decoder = {
476 .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
477 .type = AVMEDIA_TYPE_AUDIO,
478 .id = AV_CODEC_ID_FFWAVESYNTH,
479 .priv_data_size = sizeof(struct wavesynth_context),
480 .init = wavesynth_init,
481 .close = wavesynth_close,
482 .decode = wavesynth_decode,
483 .capabilities = AV_CODEC_CAP_DR1,