2 * Wavesynth pseudo-codec
3 * Copyright (c) 2011 Nicolas George
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/intreadwrite.h"
23 #include "libavutil/log.h"
29 #define WS_MAX_CHANNELS 32
30 #define INF_TS 0x7FFFFFFFFFFFFFFF
35 Format of the extradata and packets
37 THIS INFORMATION IS NOT PART OF THE PUBLIC API OR ABI.
38 IT CAN CHANGE WITHOUT NOTIFICATION.
40 All numbers are in little endian.
42 The codec extradata define a set of intervals with uniform content.
43 Overlapping intervals are added together.
46 uint32 number of intervals
50 int64 start timestamp; time_base must be 1/sample_rate;
51 start timestamps must be in ascending order
55 ... additional information, depends on type
57 sine interval (type fourcc "SINE"):
58 int32 start frequency, in 1/(1<<16) Hz
60 int32 start amplitude, 1<<16 is the full amplitude
62 uint32 start phase, 0 is sin(0), 0x20000000 is sin(pi/2), etc.;
63 n | (1<<31) means to match the phase of previous channel #n
65 pink noise interval (type fourcc "NOIS"):
69 The input packets encode the time and duration of the requested segment.
77 enum ws_interval_type {
78 WS_SINE = MKTAG('S','I','N','E'),
79 WS_NOISE = MKTAG('N','O','I','S'),
83 int64_t ts_start, ts_end;
84 uint64_t phi0, dphi0, ddphi;
86 uint64_t phi, dphi, amp;
88 enum ws_interval_type type;
92 struct wavesynth_context {
96 struct ws_interval *inter;
97 uint32_t dither_state;
99 int32_t pink_pool[PINK_UNIT];
100 unsigned pink_need, pink_pos;
106 #define LCG_A 1284865837
107 #define LCG_C 4150755663
108 #define LCG_AI 849225893 /* A*AI = 1 [mod 1<<32] */
110 static uint32_t lcg_next(uint32_t *s)
112 *s = *s * LCG_A + LCG_C;
116 static void lcg_seek(uint32_t *s, uint32_t dt)
118 uint32_t a, c, t = *s;
125 c *= a + 1; /* coefficients for a double step */
132 /* Emulate pink noise by summing white noise at the sampling frequency,
133 * white noise at half the sampling frequency (each value taken twice),
134 * etc., with a total of 8 octaves.
135 * This is known as the Voss-McCartney algorithm. */
137 static void pink_fill(struct wavesynth_context *ws)
139 int32_t vt[7] = { 0 }, v = 0;
145 for (i = 0; i < PINK_UNIT; i++) {
146 for (j = 0; j < 7; j++) {
150 vt[j] = (int32_t)lcg_next(&ws->pink_state) >> 3;
153 ws->pink_pool[i] = v + ((int32_t)lcg_next(&ws->pink_state) >> 3);
155 lcg_next(&ws->pink_state); /* so we use exactly 256 steps */
159 * @return (1<<64) * a / b, without overflow, if a < b
161 static uint64_t frac64(uint64_t a, uint64_t b)
166 if (b < (uint64_t)1 << 32) { /* b small, use two 32-bits steps */
168 return ((a / b) << 32) | ((a % b) << 32) / b;
170 if (b < (uint64_t)1 << 48) { /* b medium, use four 16-bits steps */
171 for (i = 0; i < 4; i++) {
173 r = (r << 16) | (a / b);
178 for (i = 63; i >= 0; i--) {
179 if (a >= (uint64_t)1 << 63 || a << 1 >= b) {
180 r |= (uint64_t)1 << i;
189 static uint64_t phi_at(struct ws_interval *in, int64_t ts)
191 uint64_t dt = ts - in->ts_start;
192 uint64_t dt2 = dt & 1 ? /* dt * (dt - 1) / 2 without overflow */
193 dt * ((dt - 1) >> 1) : (dt >> 1) * (dt - 1);
194 return in->phi0 + dt * in->dphi0 + dt2 * in->ddphi;
197 static void wavesynth_seek(struct wavesynth_context *ws, int64_t ts)
200 struct ws_interval *in;
202 last = &ws->cur_inter;
203 for (i = 0; i < ws->nb_inter; i++) {
205 if (ts < in->ts_start)
207 if (ts >= in->ts_end)
211 in->phi = phi_at(in, ts);
212 in->dphi = in->dphi0 + (ts - in->ts_start) * in->ddphi;
213 in->amp = in->amp0 + (ts - in->ts_start) * in->damp;
216 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
218 lcg_seek(&ws->dither_state, (uint32_t)ts - (uint32_t)ws->cur_ts);
220 uint64_t pink_ts_cur = (ws->cur_ts + (uint64_t)PINK_UNIT - 1) & ~(PINK_UNIT - 1);
221 uint64_t pink_ts_next = ts & ~(PINK_UNIT - 1);
222 int pos = ts & (PINK_UNIT - 1);
223 lcg_seek(&ws->pink_state, (uint32_t)(pink_ts_next - pink_ts_cur) * 2);
228 ws->pink_pos = PINK_UNIT;
234 static int wavesynth_parse_extradata(AVCodecContext *avc)
236 struct wavesynth_context *ws = avc->priv_data;
237 struct ws_interval *in;
238 uint8_t *edata, *edata_end;
239 int32_t f1, f2, a1, a2;
241 int64_t dphi1, dphi2, dt, cur_ts = -0x8000000000000000;
244 if (avc->extradata_size < 4)
245 return AVERROR(EINVAL);
246 edata = avc->extradata;
247 edata_end = edata + avc->extradata_size;
248 ws->nb_inter = AV_RL32(edata);
250 if (ws->nb_inter < 0 || (edata_end - edata) / 24 < ws->nb_inter)
251 return AVERROR(EINVAL);
252 ws->inter = av_calloc(ws->nb_inter, sizeof(*ws->inter));
254 return AVERROR(ENOMEM);
255 for (i = 0; i < ws->nb_inter; i++) {
257 if (edata_end - edata < 24)
258 return AVERROR(EINVAL);
259 in->ts_start = AV_RL64(edata + 0);
260 in->ts_end = AV_RL64(edata + 8);
261 in->type = AV_RL32(edata + 16);
262 in->channels = AV_RL32(edata + 20);
264 if (in->ts_start < cur_ts ||
265 in->ts_end <= in->ts_start ||
266 (uint64_t)in->ts_end - in->ts_start > INT64_MAX
268 return AVERROR(EINVAL);
269 cur_ts = in->ts_start;
270 dt = in->ts_end - in->ts_start;
273 if (edata_end - edata < 20 || avc->sample_rate <= 0)
274 return AVERROR(EINVAL);
275 f1 = AV_RL32(edata + 0);
276 f2 = AV_RL32(edata + 4);
277 a1 = AV_RL32(edata + 8);
278 a2 = AV_RL32(edata + 12);
279 phi = AV_RL32(edata + 16);
281 dphi1 = frac64(f1, (int64_t)avc->sample_rate << 16);
282 dphi2 = frac64(f2, (int64_t)avc->sample_rate << 16);
284 in->ddphi = (int64_t)(dphi2 - (uint64_t)dphi1) / dt;
285 if (phi & 0x80000000) {
288 return AVERROR(EINVAL);
289 in->phi0 = phi_at(&ws->inter[phi], in->ts_start);
291 in->phi0 = (uint64_t)phi << 33;
295 if (edata_end - edata < 8)
296 return AVERROR(EINVAL);
297 a1 = AV_RL32(edata + 0);
298 a2 = AV_RL32(edata + 4);
302 return AVERROR(EINVAL);
304 in->amp0 = (uint64_t)a1 << 32;
305 in->damp = (int64_t)(((uint64_t)a2 << 32) - ((uint64_t)a1 << 32)) / dt;
307 if (edata != edata_end)
308 return AVERROR(EINVAL);
312 static av_cold int wavesynth_init(AVCodecContext *avc)
314 struct wavesynth_context *ws = avc->priv_data;
317 if (avc->channels > WS_MAX_CHANNELS) {
318 av_log(avc, AV_LOG_ERROR,
319 "This implementation is limited to %d channels.\n",
321 return AVERROR(EINVAL);
323 r = wavesynth_parse_extradata(avc);
325 av_log(avc, AV_LOG_ERROR, "Invalid intervals definitions.\n");
328 ws->sin = av_malloc(sizeof(*ws->sin) << SIN_BITS);
330 return AVERROR(ENOMEM);
331 for (i = 0; i < 1 << SIN_BITS; i++)
332 ws->sin[i] = floor(32767 * sin(2 * M_PI * i / (1 << SIN_BITS)));
333 ws->dither_state = MKTAG('D','I','T','H');
334 for (i = 0; i < ws->nb_inter; i++)
335 ws->pink_need += ws->inter[i].type == WS_NOISE;
336 ws->pink_state = MKTAG('P','I','N','K');
337 ws->pink_pos = PINK_UNIT;
338 wavesynth_seek(ws, 0);
339 avc->sample_fmt = AV_SAMPLE_FMT_S16;
343 static void wavesynth_synth_sample(struct wavesynth_context *ws, int64_t ts,
348 struct ws_interval *in;
350 uint32_t c, all_ch = 0;
353 last = &ws->cur_inter;
354 if (ws->pink_pos == PINK_UNIT)
356 pink = ws->pink_pool[ws->pink_pos++] >> 16;
360 if (ts >= in->ts_end) {
369 val = amp * (unsigned)ws->sin[in->phi >> (64 - SIN_BITS)];
371 in->dphi += in->ddphi;
374 val = amp * (unsigned)pink;
379 all_ch |= in->channels;
380 for (c = in->channels, cv = channels; c; c >>= 1, cv++)
382 *cv += (unsigned)val;
384 val = (int32_t)lcg_next(&ws->dither_state) >> 16;
385 for (c = all_ch, cv = channels; c; c >>= 1, cv++)
390 static void wavesynth_enter_intervals(struct wavesynth_context *ws, int64_t ts)
393 struct ws_interval *in;
395 last = &ws->cur_inter;
396 for (i = ws->cur_inter; i >= 0; i = ws->inter[i].next)
397 last = &ws->inter[i].next;
398 for (i = ws->next_inter; i < ws->nb_inter; i++) {
400 if (ts < in->ts_start)
402 if (ts >= in->ts_end)
407 in->dphi = in->dphi0;
411 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
415 static int wavesynth_decode(AVCodecContext *avc, void *rframe, int *rgot_frame,
418 struct wavesynth_context *ws = avc->priv_data;
419 AVFrame *frame = rframe;
424 int32_t channels[WS_MAX_CHANNELS];
427 if (packet->size != 12)
428 return AVERROR_INVALIDDATA;
429 ts = AV_RL64(packet->data);
430 if (ts != ws->cur_ts)
431 wavesynth_seek(ws, ts);
432 duration = AV_RL32(packet->data + 8);
434 return AVERROR(EINVAL);
435 frame->nb_samples = duration;
436 r = ff_get_buffer(avc, frame, 0);
439 pcm = (int16_t *)frame->data[0];
440 for (s = 0; s < duration; s++, ts+=(uint64_t)1) {
441 memset(channels, 0, avc->channels * sizeof(*channels));
442 if (ts >= ws->next_ts)
443 wavesynth_enter_intervals(ws, ts);
444 wavesynth_synth_sample(ws, ts, channels);
445 for (c = 0; c < avc->channels; c++)
446 *(pcm++) = channels[c] >> 16;
448 ws->cur_ts += (uint64_t)duration;
453 static av_cold int wavesynth_close(AVCodecContext *avc)
455 struct wavesynth_context *ws = avc->priv_data;
458 av_freep(&ws->inter);
462 AVCodec ff_ffwavesynth_decoder = {
464 .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
465 .type = AVMEDIA_TYPE_AUDIO,
466 .id = AV_CODEC_ID_FFWAVESYNTH,
467 .priv_data_size = sizeof(struct wavesynth_context),
468 .init = wavesynth_init,
469 .close = wavesynth_close,
470 .decode = wavesynth_decode,
471 .capabilities = AV_CODEC_CAP_DR1,
472 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,