2 * Wavesynth pseudo-codec
3 * Copyright (c) 2011 Nicolas George
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/intreadwrite.h"
23 #include "libavutil/log.h"
27 #define WS_MAX_CHANNELS 32
28 #define INF_TS 0x7FFFFFFFFFFFFFFF
33 Format of the extradata and packets
35 THIS INFORMATION IS NOT PART OF THE PUBLIC API OR ABI.
36 IT CAN CHANGE WITHOUT NOTIFICATION.
38 All numbers are in little endian.
40 The codec extradata define a set of intervals with uniform content.
41 Overlapping intervals are added together.
44 uint32 number of intervals
48 int64 start timestamp; time_base must be 1/sample_rate;
49 start timestamps must be in ascending order
53 ... additional information, depends on type
55 sine interval (type fourcc "SINE"):
56 int32 start frequency, in 1/(1<<16) Hz
58 int32 start amplitude, 1<<16 is the full amplitude
60 uint32 start phase, 0 is sin(0), 0x20000000 is sin(pi/2), etc.;
61 n | (1<<31) means to match the phase of previous channel #n
63 pink noise interval (type fourcc "NOIS"):
67 The input packets encode the time and duration of the requested segment.
75 enum ws_interval_type {
76 WS_SINE = MKTAG('S','I','N','E'),
77 WS_NOISE = MKTAG('N','O','I','S'),
81 int64_t ts_start, ts_end;
82 uint64_t phi0, dphi0, ddphi;
84 uint64_t phi, dphi, amp;
86 enum ws_interval_type type;
90 struct wavesynth_context {
95 struct ws_interval *inter;
96 uint32_t dither_state;
98 int32_t pink_pool[PINK_UNIT];
99 unsigned pink_need, pink_pos;
105 #define LCG_A 1284865837
106 #define LCG_C 4150755663
107 #define LCG_AI 849225893 /* A*AI = 1 [mod 1<<32] */
109 static uint32_t lcg_next(uint32_t *s)
111 *s = *s * LCG_A + LCG_C;
115 static void lcg_seek(uint32_t *s, int64_t dt)
117 uint32_t a, c, t = *s;
122 } else { /* coefficients for a step backward */
124 c = (uint32_t)(LCG_AI * LCG_C);
130 c *= a + 1; /* coefficients for a double step */
137 /* Emulate pink noise by summing white noise at the sampling frequency,
138 * white noise at half the sampling frequency (each value taken twice),
139 * etc., with a total of 8 octaves.
140 * This is known as the Voss-McCartney algorithm. */
142 static void pink_fill(struct wavesynth_context *ws)
144 int32_t vt[7] = { 0 }, v = 0;
150 for (i = 0; i < PINK_UNIT; i++) {
151 for (j = 0; j < 7; j++) {
155 vt[j] = (int32_t)lcg_next(&ws->pink_state) >> 3;
158 ws->pink_pool[i] = v + ((int32_t)lcg_next(&ws->pink_state) >> 3);
160 lcg_next(&ws->pink_state); /* so we use exactly 256 steps */
164 * @return (1<<64) * a / b, without overflow, if a < b
166 static uint64_t frac64(uint64_t a, uint64_t b)
171 if (b < (uint64_t)1 << 32) { /* b small, use two 32-bits steps */
173 return ((a / b) << 32) | ((a % b) << 32) / b;
175 if (b < (uint64_t)1 << 48) { /* b medium, use four 16-bits steps */
176 for (i = 0; i < 4; i++) {
178 r = (r << 16) | (a / b);
183 for (i = 63; i >= 0; i--) {
184 if (a >= (uint64_t)1 << 63 || a << 1 >= b) {
185 r |= (uint64_t)1 << i;
194 static uint64_t phi_at(struct ws_interval *in, int64_t ts)
196 uint64_t dt = ts - in->ts_start;
197 uint64_t dt2 = dt & 1 ? /* dt * (dt - 1) / 2 without overflow */
198 dt * ((dt - 1) >> 1) : (dt >> 1) * (dt - 1);
199 return in->phi0 + dt * in->dphi0 + dt2 * in->ddphi;
202 static void wavesynth_seek(struct wavesynth_context *ws, int64_t ts)
205 struct ws_interval *in;
207 last = &ws->cur_inter;
208 for (i = 0; i < ws->nb_inter; i++) {
210 if (ts < in->ts_start)
212 if (ts >= in->ts_end)
216 in->phi = phi_at(in, ts);
217 in->dphi = in->dphi0 + (ts - in->ts_start) * in->ddphi;
218 in->amp = in->amp0 + (ts - in->ts_start) * in->damp;
221 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
223 lcg_seek(&ws->dither_state, ts - ws->cur_ts);
225 int64_t pink_ts_cur = (ws->cur_ts + PINK_UNIT - 1) & ~(PINK_UNIT - 1);
226 int64_t pink_ts_next = ts & ~(PINK_UNIT - 1);
227 int pos = ts & (PINK_UNIT - 1);
228 lcg_seek(&ws->pink_state, (pink_ts_next - pink_ts_cur) << 1);
233 ws->pink_pos = PINK_UNIT;
239 static int wavesynth_parse_extradata(AVCodecContext *avc)
241 struct wavesynth_context *ws = avc->priv_data;
242 struct ws_interval *in;
243 uint8_t *edata, *edata_end;
244 int32_t f1, f2, a1, a2;
246 int64_t dphi1, dphi2, dt, cur_ts = -0x8000000000000000;
249 if (avc->extradata_size < 4)
250 return AVERROR(EINVAL);
251 edata = avc->extradata;
252 edata_end = edata + avc->extradata_size;
253 ws->nb_inter = AV_RL32(edata);
255 if (ws->nb_inter < 0)
256 return AVERROR(EINVAL);
257 ws->inter = av_calloc(ws->nb_inter, sizeof(*ws->inter));
259 return AVERROR(ENOMEM);
260 for (i = 0; i < ws->nb_inter; i++) {
262 if (edata_end - edata < 24)
263 return AVERROR(EINVAL);
264 in->ts_start = AV_RL64(edata + 0);
265 in->ts_end = AV_RL64(edata + 8);
266 in->type = AV_RL32(edata + 16);
267 in->channels = AV_RL32(edata + 20);
269 if (in->ts_start < cur_ts || in->ts_end <= in->ts_start)
270 return AVERROR(EINVAL);
271 cur_ts = in->ts_start;
272 dt = in->ts_end - in->ts_start;
275 if (edata_end - edata < 20)
276 return AVERROR(EINVAL);
277 f1 = AV_RL32(edata + 0);
278 f2 = AV_RL32(edata + 4);
279 a1 = AV_RL32(edata + 8);
280 a2 = AV_RL32(edata + 12);
281 phi = AV_RL32(edata + 16);
283 dphi1 = frac64(f1, (int64_t)avc->sample_rate << 16);
284 dphi2 = frac64(f2, (int64_t)avc->sample_rate << 16);
286 in->ddphi = (dphi2 - dphi1) / dt;
287 if (phi & 0x80000000) {
290 return AVERROR(EINVAL);
291 in->phi0 = phi_at(&ws->inter[phi], in->ts_start);
293 in->phi0 = (uint64_t)phi << 33;
297 if (edata_end - edata < 8)
298 return AVERROR(EINVAL);
299 a1 = AV_RL32(edata + 0);
300 a2 = AV_RL32(edata + 4);
304 return AVERROR(EINVAL);
306 in->amp0 = (int64_t)a1 << 32;
307 in->damp = (((int64_t)a2 << 32) - ((int64_t)a1 << 32)) / dt;
309 if (edata != edata_end)
310 return AVERROR(EINVAL);
314 static av_cold int wavesynth_init(AVCodecContext *avc)
316 struct wavesynth_context *ws = avc->priv_data;
319 if (avc->channels > WS_MAX_CHANNELS) {
320 av_log(avc, AV_LOG_ERROR,
321 "This implementation is limited to %d channels.\n",
323 return AVERROR(EINVAL);
325 r = wavesynth_parse_extradata(avc);
327 av_log(avc, AV_LOG_ERROR, "Invalid intervals definitions.\n");
330 ws->sin = av_malloc(sizeof(*ws->sin) << SIN_BITS);
335 for (i = 0; i < 1 << SIN_BITS; i++)
336 ws->sin[i] = floor(32767 * sin(2 * M_PI * i / (1 << SIN_BITS)));
337 ws->dither_state = MKTAG('D','I','T','H');
338 for (i = 0; i < ws->nb_inter; i++)
339 ws->pink_need += ws->inter[i].type == WS_NOISE;
340 ws->pink_state = MKTAG('P','I','N','K');
341 ws->pink_pos = PINK_UNIT;
342 avcodec_get_frame_defaults(&ws->frame);
343 avc->coded_frame = &ws->frame;
344 wavesynth_seek(ws, 0);
345 avc->sample_fmt = AV_SAMPLE_FMT_S16;
354 static void wavesynth_synth_sample(struct wavesynth_context *ws, int64_t ts,
357 int32_t amp, val, *cv;
358 struct ws_interval *in;
360 uint32_t c, all_ch = 0;
363 last = &ws->cur_inter;
364 if (ws->pink_pos == PINK_UNIT)
366 pink = ws->pink_pool[ws->pink_pos++] >> 16;
370 if (ts >= in->ts_end) {
379 val = amp * ws->sin[in->phi >> (64 - SIN_BITS)];
381 in->dphi += in->ddphi;
389 all_ch |= in->channels;
390 for (c = in->channels, cv = channels; c; c >>= 1, cv++)
394 val = (int32_t)lcg_next(&ws->dither_state) >> 16;
395 for (c = all_ch, cv = channels; c; c >>= 1, cv++)
400 static void wavesynth_enter_intervals(struct wavesynth_context *ws, int64_t ts)
403 struct ws_interval *in;
405 last = &ws->cur_inter;
406 for (i = ws->cur_inter; i >= 0; i = ws->inter[i].next)
407 last = &ws->inter[i].next;
408 for (i = ws->next_inter; i < ws->nb_inter; i++) {
410 if (ts < in->ts_start)
412 if (ts >= in->ts_end)
417 in->dphi = in->dphi0;
421 ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
425 static int wavesynth_decode(AVCodecContext *avc, void *rframe, int *rgot_frame,
428 struct wavesynth_context *ws = avc->priv_data;
433 int32_t channels[WS_MAX_CHANNELS];
436 if (packet->size != 12)
437 return AVERROR_INVALIDDATA;
438 ts = AV_RL64(packet->data);
439 if (ts != ws->cur_ts)
440 wavesynth_seek(ws, ts);
441 duration = AV_RL32(packet->data + 8);
443 return AVERROR(EINVAL);
444 ws->frame.nb_samples = duration;
445 r = avc->get_buffer(avc, &ws->frame);
448 pcm = (int16_t *)ws->frame.data[0];
449 for (s = 0; s < duration; s++, ts++) {
450 memset(channels, 0, avc->channels * sizeof(*channels));
451 if (ts >= ws->next_ts)
452 wavesynth_enter_intervals(ws, ts);
453 wavesynth_synth_sample(ws, ts, channels);
454 for (c = 0; c < avc->channels; c++)
455 *(pcm++) = channels[c] >> 16;
457 ws->cur_ts += duration;
459 *(AVFrame *)rframe = ws->frame;
463 static av_cold int wavesynth_close(AVCodecContext *avc)
465 struct wavesynth_context *ws = avc->priv_data;
472 AVCodec ff_ffwavesynth_decoder = {
474 .type = AVMEDIA_TYPE_AUDIO,
475 .id = AV_CODEC_ID_FFWAVESYNTH,
476 .priv_data_size = sizeof(struct wavesynth_context),
477 .init = wavesynth_init,
478 .close = wavesynth_close,
479 .decode = wavesynth_decode,
480 .capabilities = CODEC_CAP_DR1,
481 .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),