2 * Wavesynth pseudo-codec
3 * Copyright (c) 2011 Nicolas George
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/intreadwrite.h"
23 #include "libavutil/log.h"
29 #define WS_MAX_CHANNELS 32
30 #define INF_TS 0x7FFFFFFFFFFFFFFF
35 Format of the extradata and packets
37 THIS INFORMATION IS NOT PART OF THE PUBLIC API OR ABI.
38 IT CAN CHANGE WITHOUT NOTIFICATION.
40 All numbers are in little endian.
42 The codec extradata define a set of intervals with uniform content.
43 Overlapping intervals are added together.
46 uint32 number of intervals
50 int64 start timestamp; time_base must be 1/sample_rate;
51 start timestamps must be in ascending order
55 ... additional information, depends on type
57 sine interval (type fourcc "SINE"):
58 int32 start frequency, in 1/(1<<16) Hz
60 int32 start amplitude, 1<<16 is the full amplitude
62 uint32 start phase, 0 is sin(0), 0x20000000 is sin(pi/2), etc.;
63 n | (1<<31) means to match the phase of previous channel #n
65 pink noise interval (type fourcc "NOIS"):
69 The input packets encode the time and duration of the requested segment.
77 enum ws_interval_type {
78 WS_SINE = MKTAG('S','I','N','E'),
79 WS_NOISE = MKTAG('N','O','I','S'),
83 int64_t ts_start, ts_end;
84 uint64_t phi0, dphi0, ddphi;
86 uint64_t phi, dphi, amp;
88 enum ws_interval_type type;
92 struct wavesynth_context {
97 struct ws_interval *inter;
98 uint32_t dither_state;
100 int32_t pink_pool[PINK_UNIT];
101 unsigned pink_need, pink_pos;
107 #define LCG_A 1284865837
108 #define LCG_C 4150755663
109 #define LCG_AI 849225893 /* A*AI = 1 [mod 1<<32] */
111 static uint32_t lcg_next(uint32_t *s)
113 *s = *s * LCG_A + LCG_C;
117 static void lcg_seek(uint32_t *s, int64_t dt)
119 uint32_t a, c, t = *s;
124 } else { /* coefficients for a step backward */
126 c = (uint32_t)(LCG_AI * LCG_C);
132 c *= a + 1; /* coefficients for a double step */
139 /* Emulate pink noise by summing white noise at the sampling frequency,
140 * white noise at half the sampling frequency (each value taken twice),
141 * etc., with a total of 8 octaves.
142 * This is known as the Voss-McCartney algorithm. */
144 static void pink_fill(struct wavesynth_context *ws)
146 int32_t vt[7] = { 0 }, v = 0;
152 for (i = 0; i < PINK_UNIT; i++) {
153 for (j = 0; j < 7; j++) {
157 vt[j] = (int32_t)lcg_next(&ws->pink_state) >> 3;
160 ws->pink_pool[i] = v + ((int32_t)lcg_next(&ws->pink_state) >> 3);
162 lcg_next(&ws->pink_state); /* so we use exactly 256 steps */
166 * @return (1<<64) * a / b, without overflow, if a < b
168 static uint64_t frac64(uint64_t a, uint64_t b)
173 if (b < (uint64_t)1 << 32) { /* b small, use two 32-bits steps */
175 return ((a / b) << 32) | ((a % b) << 32) / b;
177 if (b < (uint64_t)1 << 48) { /* b medium, use four 16-bits steps */
178 for (i = 0; i < 4; i++) {
180 r = (r << 16) | (a / b);
185 for (i = 63; i >= 0; i--) {
186 if (a >= (uint64_t)1 << 63 || a << 1 >= b) {
187 r |= (uint64_t)1 << i;
196 static uint64_t phi_at(struct ws_interval *in, int64_t ts)
198 uint64_t dt = ts - in->ts_start;
199 uint64_t dt2 = dt & 1 ? /* dt * (dt - 1) / 2 without overflow */
200 dt * ((dt - 1) >> 1) : (dt >> 1) * (dt - 1);
201 return in->phi0 + dt * in->dphi0 + dt2 * in->ddphi;
204 static void wavesynth_seek(struct wavesynth_context *ws, int64_t ts)
207 struct ws_interval *in;
209 last = &ws->cur_inter;
210 for (i = 0; i < ws->nb_inter; i++) {
212 if (ts < in->ts_start)
214 if (ts >= in->ts_end)
218 in->phi = phi_at(in, ts);
219 in->dphi = in->dphi0 + (ts - in->ts_start) * in->ddphi;
220 in->amp = in->amp0 + (ts - in->ts_start) * in->damp;
223 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
225 lcg_seek(&ws->dither_state, ts - ws->cur_ts);
227 int64_t pink_ts_cur = (ws->cur_ts + PINK_UNIT - 1) & ~(PINK_UNIT - 1);
228 int64_t pink_ts_next = ts & ~(PINK_UNIT - 1);
229 int pos = ts & (PINK_UNIT - 1);
230 lcg_seek(&ws->pink_state, (pink_ts_next - pink_ts_cur) << 1);
235 ws->pink_pos = PINK_UNIT;
241 static int wavesynth_parse_extradata(AVCodecContext *avc)
243 struct wavesynth_context *ws = avc->priv_data;
244 struct ws_interval *in;
245 uint8_t *edata, *edata_end;
246 int32_t f1, f2, a1, a2;
248 int64_t dphi1, dphi2, dt, cur_ts = -0x8000000000000000;
251 if (avc->extradata_size < 4)
252 return AVERROR(EINVAL);
253 edata = avc->extradata;
254 edata_end = edata + avc->extradata_size;
255 ws->nb_inter = AV_RL32(edata);
257 if (ws->nb_inter < 0)
258 return AVERROR(EINVAL);
259 ws->inter = av_calloc(ws->nb_inter, sizeof(*ws->inter));
261 return AVERROR(ENOMEM);
262 for (i = 0; i < ws->nb_inter; i++) {
264 if (edata_end - edata < 24)
265 return AVERROR(EINVAL);
266 in->ts_start = AV_RL64(edata + 0);
267 in->ts_end = AV_RL64(edata + 8);
268 in->type = AV_RL32(edata + 16);
269 in->channels = AV_RL32(edata + 20);
271 if (in->ts_start < cur_ts || in->ts_end <= in->ts_start)
272 return AVERROR(EINVAL);
273 cur_ts = in->ts_start;
274 dt = in->ts_end - in->ts_start;
277 if (edata_end - edata < 20)
278 return AVERROR(EINVAL);
279 f1 = AV_RL32(edata + 0);
280 f2 = AV_RL32(edata + 4);
281 a1 = AV_RL32(edata + 8);
282 a2 = AV_RL32(edata + 12);
283 phi = AV_RL32(edata + 16);
285 dphi1 = frac64(f1, (int64_t)avc->sample_rate << 16);
286 dphi2 = frac64(f2, (int64_t)avc->sample_rate << 16);
288 in->ddphi = (dphi2 - dphi1) / dt;
289 if (phi & 0x80000000) {
292 return AVERROR(EINVAL);
293 in->phi0 = phi_at(&ws->inter[phi], in->ts_start);
295 in->phi0 = (uint64_t)phi << 33;
299 if (edata_end - edata < 8)
300 return AVERROR(EINVAL);
301 a1 = AV_RL32(edata + 0);
302 a2 = AV_RL32(edata + 4);
306 return AVERROR(EINVAL);
308 in->amp0 = (int64_t)a1 << 32;
309 in->damp = (((int64_t)a2 << 32) - ((int64_t)a1 << 32)) / dt;
311 if (edata != edata_end)
312 return AVERROR(EINVAL);
316 static av_cold int wavesynth_init(AVCodecContext *avc)
318 struct wavesynth_context *ws = avc->priv_data;
321 if (avc->channels > WS_MAX_CHANNELS) {
322 av_log(avc, AV_LOG_ERROR,
323 "This implementation is limited to %d channels.\n",
325 return AVERROR(EINVAL);
327 r = wavesynth_parse_extradata(avc);
329 av_log(avc, AV_LOG_ERROR, "Invalid intervals definitions.\n");
332 ws->sin = av_malloc(sizeof(*ws->sin) << SIN_BITS);
337 for (i = 0; i < 1 << SIN_BITS; i++)
338 ws->sin[i] = floor(32767 * sin(2 * M_PI * i / (1 << SIN_BITS)));
339 ws->dither_state = MKTAG('D','I','T','H');
340 for (i = 0; i < ws->nb_inter; i++)
341 ws->pink_need += ws->inter[i].type == WS_NOISE;
342 ws->pink_state = MKTAG('P','I','N','K');
343 ws->pink_pos = PINK_UNIT;
344 avcodec_get_frame_defaults(&ws->frame);
345 avc->coded_frame = &ws->frame;
346 wavesynth_seek(ws, 0);
347 avc->sample_fmt = AV_SAMPLE_FMT_S16;
356 static void wavesynth_synth_sample(struct wavesynth_context *ws, int64_t ts,
359 int32_t amp, val, *cv;
360 struct ws_interval *in;
362 uint32_t c, all_ch = 0;
365 last = &ws->cur_inter;
366 if (ws->pink_pos == PINK_UNIT)
368 pink = ws->pink_pool[ws->pink_pos++] >> 16;
372 if (ts >= in->ts_end) {
381 val = amp * ws->sin[in->phi >> (64 - SIN_BITS)];
383 in->dphi += in->ddphi;
391 all_ch |= in->channels;
392 for (c = in->channels, cv = channels; c; c >>= 1, cv++)
396 val = (int32_t)lcg_next(&ws->dither_state) >> 16;
397 for (c = all_ch, cv = channels; c; c >>= 1, cv++)
402 static void wavesynth_enter_intervals(struct wavesynth_context *ws, int64_t ts)
405 struct ws_interval *in;
407 last = &ws->cur_inter;
408 for (i = ws->cur_inter; i >= 0; i = ws->inter[i].next)
409 last = &ws->inter[i].next;
410 for (i = ws->next_inter; i < ws->nb_inter; i++) {
412 if (ts < in->ts_start)
414 if (ts >= in->ts_end)
419 in->dphi = in->dphi0;
423 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
427 static int wavesynth_decode(AVCodecContext *avc, void *rframe, int *rgot_frame,
430 struct wavesynth_context *ws = avc->priv_data;
435 int32_t channels[WS_MAX_CHANNELS];
438 if (packet->size != 12)
439 return AVERROR_INVALIDDATA;
440 ts = AV_RL64(packet->data);
441 if (ts != ws->cur_ts)
442 wavesynth_seek(ws, ts);
443 duration = AV_RL32(packet->data + 8);
445 return AVERROR(EINVAL);
446 ws->frame.nb_samples = duration;
447 r = ff_get_buffer(avc, &ws->frame);
450 pcm = (int16_t *)ws->frame.data[0];
451 for (s = 0; s < duration; s++, ts++) {
452 memset(channels, 0, avc->channels * sizeof(*channels));
453 if (ts >= ws->next_ts)
454 wavesynth_enter_intervals(ws, ts);
455 wavesynth_synth_sample(ws, ts, channels);
456 for (c = 0; c < avc->channels; c++)
457 *(pcm++) = channels[c] >> 16;
459 ws->cur_ts += duration;
461 *(AVFrame *)rframe = ws->frame;
465 static av_cold int wavesynth_close(AVCodecContext *avc)
467 struct wavesynth_context *ws = avc->priv_data;
474 AVCodec ff_ffwavesynth_decoder = {
476 .type = AVMEDIA_TYPE_AUDIO,
477 .id = AV_CODEC_ID_FFWAVESYNTH,
478 .priv_data_size = sizeof(struct wavesynth_context),
479 .init = wavesynth_init,
480 .close = wavesynth_close,
481 .decode = wavesynth_decode,
482 .capabilities = CODEC_CAP_DR1,
483 .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),