From bd548bb6f007d0b106a241480676bbc18196bdc8 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Thu, 5 Mar 2015 19:28:48 +0100 Subject: [PATCH] Use ffmpeg to read audio files, instead of assuming raw format. --- Makefile | 5 +- audioreader.cpp | 194 ++++++++++++++++++++++++++++++++++++++++++++++++ audioreader.h | 10 +++ decode.cpp | 12 +-- 4 files changed, 211 insertions(+), 10 deletions(-) create mode 100644 audioreader.cpp create mode 100644 audioreader.h diff --git a/Makefile b/Makefile index a4eaae9..91e6354 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ CXXFLAGS=--std=gnu++0x -O2 -ffast-math -g -Wall +LDLIBS=-lavcodec -lavformat -lavutil -lswresample all: synth decode sync level cleaner @@ -10,8 +11,8 @@ OBJS=decode.o synth.o synth_main.o interpolate.o sync.o level.o DEPS=$(OBJS:.o=.d) -include $(DEPS) -decode: interpolate.o decode.o - $(CXX) -o $@ $^ $(LDFLAGS) +decode: interpolate.o audioreader.o decode.o + $(CXX) -o $@ $^ $(LDLIBS) $(LDFLAGS) synth: synth.o synth_main.o $(CXX) -o $@ $^ $(LDFLAGS) diff --git a/audioreader.cpp b/audioreader.cpp new file mode 100644 index 0000000..b95dbf4 --- /dev/null +++ b/audioreader.cpp @@ -0,0 +1,194 @@ +#include + +extern "C" { + +#include +#include +#include + +} + +#include +#include + +namespace { + +struct AVFormatCloserAndDeleter { + void operator() (AVFormatContext *ctx) { + avformat_close_input(&ctx); + avformat_free_context(ctx); + } +}; + +struct AVCodecContextDeleter { + void operator() (AVCodecContext *ctx) { + avcodec_free_context(&ctx); + } +}; + +struct SwrContextDeleter { + void operator() (SwrContext *swr) { + swr_free(&swr); + } +}; + +struct AVPacketDeleter { + void operator() (AVPacket *pkt) { + av_free_packet(pkt); + } +}; + +struct AVFrameDeleter { + void operator() (AVFrame *frame) { + av_frame_free(&frame); + } +}; + +struct AVSampleDeleter { + void operator() (uint8_t *data) { + av_freep(&data); + } +}; + +void convert_samples(SwrContext *swr, int sample_rate, const uint8_t **data, int nb_samples, std::vector *samples) +{ + int max_out_samples = nb_samples + swr_get_delay(swr, sample_rate); + if (max_out_samples == 0) { + return; + } + uint8_t *output; + av_samples_alloc(&output, nullptr, 1, max_out_samples, AV_SAMPLE_FMT_S16, 0); + std::unique_ptr output_deleter(output); + + int out_samples = swr_convert(swr, &output, max_out_samples, data, nb_samples); + if (out_samples > 0) { + const int16_t* start = reinterpret_cast(output); + const int16_t* end = start + out_samples; + samples->insert(samples->end(), start, end); + } +} + +int decode_packet(const char *filename, AVCodecContext *codec_ctx, SwrContext *swr, AVFrame *audio_frame, AVPacket *packet, int *got_frame, std::vector *samples) +{ + *got_frame = 0; + int len1 = avcodec_decode_audio4(codec_ctx, audio_frame, got_frame, packet); + if (len1 < 0 || !*got_frame) { + return len1; + } + + if (audio_frame->channel_layout != codec_ctx->channel_layout || + audio_frame->sample_rate != codec_ctx->sample_rate) { + fprintf(stderr, "%s: Channel layout or sample rate changed mid-file\n", filename); + *got_frame = false; + return len1; + } + convert_samples(swr, codec_ctx->sample_rate, (const uint8_t **)audio_frame->data, audio_frame->nb_samples, samples); + return len1; +} + +} // namespace + +bool read_audio_file(const char *filename, std::vector *samples) +{ + av_register_all(); + + AVFormatContext *format_ctx = nullptr; + if (avformat_open_input(&format_ctx, filename, nullptr, nullptr) != 0) { + fprintf(stderr, "Couldn't open %s\n", filename); + return false; + } + std::unique_ptr format_ctx_closer(format_ctx); + + if (avformat_find_stream_info(format_ctx, nullptr) < 0) { + fprintf(stderr, "%s: Couldn't find stream information\n", filename); + return false; + } + + // Find the first audio stream. + int audio_stream_index = -1; + for (unsigned i = 0; i < format_ctx->nb_streams; ++i) { + if (format_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) { + audio_stream_index = i; + break; + } + } + if (audio_stream_index == -1) { + fprintf(stderr, "%s: Couldn't find an audio stream\n", filename); + return false; + } + + AVCodec *codec = avcodec_find_decoder(format_ctx->streams[audio_stream_index]->codec->codec_id); + if (codec == nullptr) { + fprintf(stderr, "%s: Unsupported codec\n", filename); + return false; + } + + AVCodecContext *codec_ctx = avcodec_alloc_context3(codec); + std::unique_ptr codec_ctx_deleter(codec_ctx); + if (avcodec_copy_context(codec_ctx, format_ctx->streams[audio_stream_index]->codec) != 0) { + fprintf(stderr, "%s: Couldn't copy codec context\n", filename); + return false; + } + + if (avcodec_open2(codec_ctx, codec, nullptr) < 0) { + fprintf(stderr, "%s: Couldn't open codec\n", filename); + return false; + } + + // Init resampler (to downmix to mono and convert to s16). + if (codec_ctx->channel_layout == 0) { + codec_ctx->channel_layout = av_get_default_channel_layout(codec_ctx->channels); + } + SwrContext *swr = swr_alloc_set_opts( + nullptr, + AV_CH_LAYOUT_MONO, AV_SAMPLE_FMT_S16, codec_ctx->sample_rate, + codec_ctx->channel_layout, codec_ctx->sample_fmt, codec_ctx->sample_rate, + 0, nullptr); + std::unique_ptr swr_deleter(swr); + if (swr_init(swr) < 0) { + fprintf(stderr, "%s: Couldn't initialize resampler\n", filename); + return false; + } + + AVPacket packet; + AVFrame* audio_frame = av_frame_alloc(); + std::unique_ptr audio_frame_deleter(audio_frame); + while (av_read_frame(format_ctx, &packet) >= 0) { + std::unique_ptr av_packet_deleter(&packet); + + if (packet.stream_index != audio_stream_index) { + continue; + } + + while (packet.size > 0) { + int got_frame = 0; + int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples); + if (len1 < 0) { + fprintf(stderr, "%s: Couldn't decode audio\n", filename); + return false; + } + if (!got_frame) { + break; + } + packet.data += len1; + packet.size -= len1; + } + } + + // Flush any delayed data from the end. + packet.data = nullptr; + packet.size = 0; + int got_frame = 0; + do { + int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples); + if (len1 < 0) { + fprintf(stderr, "%s: Couldn't decode audio\n", filename); + return false; + } + } while (got_frame); + + // Convert any leftover samples from the converter. + convert_samples(swr, codec_ctx->sample_rate, nullptr, 0, samples); + + return true; +} diff --git a/audioreader.h b/audioreader.h new file mode 100644 index 0000000..273ebd2 --- /dev/null +++ b/audioreader.h @@ -0,0 +1,10 @@ +#ifndef _AUDIOREADER_H +#define _AUDIOREADER_H 1 + +#include + +#include + +bool read_audio_file(const char *filename, std::vector *samples); + +#endif // !defined(_AUDIOREADER_H) diff --git a/decode.cpp b/decode.cpp index 49be647..e85f579 100644 --- a/decode.cpp +++ b/decode.cpp @@ -6,6 +6,7 @@ #include #include +#include "audioreader.h" #include "interpolate.h" #include "tap.h" @@ -55,14 +56,9 @@ int main(int argc, char **argv) { make_lanczos_weight_table(); std::vector pcm; - - while (!feof(stdin)) { - short buf[BUFSIZE]; - ssize_t ret = fread(buf, 2, BUFSIZE, stdin); - if (ret >= 0) { - pcm.insert(pcm.end(), buf, buf + ret); - } - } + if (!read_audio_file(argv[1], &pcm)) { + exit(1); + } #if 0 for (int i = 0; i < LEN; ++i) { -- 2.39.2