5 #define __STDC_CONSTANT_MACROS
7 #include <libavcodec/avcodec.h>
8 #include <libavformat/avformat.h>
9 #include <libswresample/swresample.h>
10 #include <libavutil/avutil.h>
19 struct AVFormatCloserAndDeleter {
20 void operator() (AVFormatContext *ctx) {
21 avformat_close_input(&ctx);
22 avformat_free_context(ctx);
26 struct AVCodecContextDeleter {
27 void operator() (AVCodecContext *ctx) {
33 struct SwrContextDeleter {
34 void operator() (SwrContext *swr) {
39 struct AVPacketDeleter {
40 void operator() (AVPacket *pkt) {
45 struct AVFrameDeleter {
46 void operator() (AVFrame *frame) {
47 av_frame_free(&frame);
51 struct AVSampleDeleter {
52 void operator() (uint8_t *data) {
57 void convert_samples(SwrContext *swr, int sample_rate, const uint8_t **data, int nb_samples, std::vector<float> *samples)
59 int max_out_samples = nb_samples + swr_get_delay(swr, sample_rate);
60 if (max_out_samples == 0) {
64 av_samples_alloc(&output, nullptr, 1, max_out_samples, AV_SAMPLE_FMT_FLT, 0);
65 std::unique_ptr<uint8_t, AVSampleDeleter> output_deleter(output);
67 int out_samples = swr_convert(swr, &output, max_out_samples, data, nb_samples);
68 if (out_samples > 0) {
69 const float* start = reinterpret_cast<const float *>(output);
70 const float* end = start + out_samples;
71 samples->insert(samples->end(), start, end);
75 int decode_packet(const char *filename, AVCodecContext *codec_ctx, SwrContext *swr, AVFrame *audio_frame, AVPacket *packet, int *got_frame, std::vector<float> *samples)
78 int len1 = avcodec_decode_audio4(codec_ctx, audio_frame, got_frame, packet);
79 if (len1 < 0 || !*got_frame) {
83 if (audio_frame->channel_layout != codec_ctx->channel_layout ||
84 audio_frame->sample_rate != codec_ctx->sample_rate) {
85 fprintf(stderr, "%s: Channel layout or sample rate changed mid-file\n", filename);
89 convert_samples(swr, codec_ctx->sample_rate, (const uint8_t **)audio_frame->data, audio_frame->nb_samples, samples);
95 bool read_audio_file(const char *filename, std::vector<float> *samples, int *sample_rate)
99 AVFormatContext *format_ctx = nullptr;
100 if (avformat_open_input(&format_ctx, filename, nullptr, nullptr) != 0) {
101 fprintf(stderr, "Couldn't open %s\n", filename);
104 std::unique_ptr<AVFormatContext, AVFormatCloserAndDeleter> format_ctx_closer(format_ctx);
106 if (avformat_find_stream_info(format_ctx, nullptr) < 0) {
107 fprintf(stderr, "%s: Couldn't find stream information\n", filename);
111 // Find the first audio stream.
112 int audio_stream_index = -1;
113 for (unsigned i = 0; i < format_ctx->nb_streams; ++i) {
114 if (format_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
115 audio_stream_index = i;
119 if (audio_stream_index == -1) {
120 fprintf(stderr, "%s: Couldn't find an audio stream\n", filename);
124 AVCodec *codec = avcodec_find_decoder(format_ctx->streams[audio_stream_index]->codec->codec_id);
125 if (codec == nullptr) {
126 fprintf(stderr, "%s: Unsupported codec\n", filename);
130 AVCodecContext *codec_ctx = avcodec_alloc_context3(codec);
131 std::unique_ptr<AVCodecContext, AVCodecContextDeleter> codec_ctx_deleter(codec_ctx);
132 if (avcodec_copy_context(codec_ctx, format_ctx->streams[audio_stream_index]->codec) != 0) {
133 fprintf(stderr, "%s: Couldn't copy codec context\n", filename);
137 if (avcodec_open2(codec_ctx, codec, nullptr) < 0) {
138 fprintf(stderr, "%s: Couldn't open codec\n", filename);
142 // Init resampler (to downmix to mono and convert to s16).
143 if (codec_ctx->channel_layout == 0) {
144 codec_ctx->channel_layout = av_get_default_channel_layout(codec_ctx->channels);
146 SwrContext *swr = swr_alloc_set_opts(
148 AV_CH_LAYOUT_MONO, AV_SAMPLE_FMT_FLT, codec_ctx->sample_rate,
149 codec_ctx->channel_layout, codec_ctx->sample_fmt, codec_ctx->sample_rate,
151 std::unique_ptr<SwrContext, SwrContextDeleter> swr_deleter(swr);
152 if (swr_init(swr) < 0) {
153 fprintf(stderr, "%s: Couldn't initialize resampler\n", filename);
158 #if (LIBAVCODEC_VERSION_MAJOR >= 55)
159 AVFrame *audio_frame = av_frame_alloc();
160 std::unique_ptr<AVFrame, AVFrameDeleter> audio_frame_deleter(audio_frame);
162 AVFrame frame_holder {};
163 AVFrame *audio_frame = &frame_holder;
165 while (av_read_frame(format_ctx, &packet) >= 0) {
166 std::unique_ptr<AVPacket, AVPacketDeleter> av_packet_deleter(&packet);
168 if (packet.stream_index != audio_stream_index) {
172 while (packet.size > 0) {
174 int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples);
176 fprintf(stderr, "%s: Couldn't decode audio\n", filename);
187 // Flush any delayed data from the end.
188 packet.data = nullptr;
192 int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples);
194 fprintf(stderr, "%s: Couldn't decode audio\n", filename);
199 // Convert any leftover samples from the converter.
200 convert_samples(swr, codec_ctx->sample_rate, nullptr, 0, samples);
202 *sample_rate = codec_ctx->sample_rate;