git.sesse.net Git - c64tapwav/blob - audioreader.cpp

   1 #include <stdio.h>
   2
   3 extern "C" {
   4
   5 #include <libavcodec/avcodec.h>
   6 #include <libavformat/avformat.h>
   7 #include <libswresample/swresample.h>
   8 #include <libavutil/avutil.h>
   9 #include <libavutil/frame.h>
  10
  11 }
  12
  13 #include <memory>
  14 #include <vector>
  15
  16 namespace {
  17
  18 struct AVFormatCloserAndDeleter {
  19         void operator() (AVFormatContext *ctx) {
  20                 avformat_close_input(&ctx);
  21                 avformat_free_context(ctx);
  22         }
  23 };
  24
  25 struct AVCodecContextDeleter {
  26         void operator() (AVCodecContext *ctx) {
  27                 avcodec_free_context(&ctx);
  28         }
  29 };
  30
  31 struct SwrContextDeleter {
  32         void operator() (SwrContext *swr) {
  33                 swr_free(&swr);
  34         }
  35 };
  36
  37 struct AVPacketDeleter {
  38         void operator() (AVPacket *pkt) {
  39                 av_free_packet(pkt);
  40         }
  41 };
  42
  43 struct AVFrameDeleter {
  44         void operator() (AVFrame *frame) {
  45                 av_frame_free(&frame);
  46         }
  47 };
  48
  49 struct AVSampleDeleter {
  50         void operator() (uint8_t *data) {
  51                 av_freep(&data);
  52         }
  53 };
  54
  55 void convert_samples(SwrContext *swr, int sample_rate, const uint8_t **data, int nb_samples, std::vector<float> *samples)
  56 {
  57         int max_out_samples = nb_samples + swr_get_delay(swr, sample_rate);
  58         if (max_out_samples == 0) {
  59                 return;
  60         }
  61         uint8_t *output;
  62         av_samples_alloc(&output, nullptr, 1, max_out_samples, AV_SAMPLE_FMT_FLT, 0);
  63         std::unique_ptr<uint8_t, AVSampleDeleter> output_deleter(output);
  64
  65         int out_samples = swr_convert(swr, &output, max_out_samples, data, nb_samples);
  66         if (out_samples > 0) {
  67                 const float* start = reinterpret_cast<const float *>(output);
  68                 const float* end = start + out_samples;
  69                 samples->insert(samples->end(), start, end);
  70         }
  71 }
  72
  73 int decode_packet(const char *filename, AVCodecContext *codec_ctx, SwrContext *swr, AVFrame *audio_frame, AVPacket *packet, int *got_frame, std::vector<float> *samples)
  74 {
  75         *got_frame = 0;
  76         int len1 = avcodec_decode_audio4(codec_ctx, audio_frame, got_frame, packet);
  77         if (len1 < 0 || !*got_frame) {
  78                 return len1;
  79         }
  80
  81         if (audio_frame->channel_layout != codec_ctx->channel_layout ||
  82             audio_frame->sample_rate != codec_ctx->sample_rate) {
  83                 fprintf(stderr, "%s: Channel layout or sample rate changed mid-file\n", filename);
  84                 *got_frame = false;
  85                 return len1;
  86         }
  87         convert_samples(swr, codec_ctx->sample_rate, (const uint8_t **)audio_frame->data, audio_frame->nb_samples, samples);
  88         return len1;
  89 }
  90
  91 }  // namespace
  92
  93 bool read_audio_file(const char *filename, std::vector<float> *samples, int *sample_rate)
  94 {
  95         av_register_all();
  96
  97         AVFormatContext *format_ctx = nullptr;
  98         if (avformat_open_input(&format_ctx, filename, nullptr, nullptr) != 0) {
  99                 fprintf(stderr, "Couldn't open %s\n", filename);
 100                 return false;
 101         }
 102         std::unique_ptr<AVFormatContext, AVFormatCloserAndDeleter> format_ctx_closer(format_ctx);
 103
 104         if (avformat_find_stream_info(format_ctx, nullptr) < 0) {
 105                 fprintf(stderr, "%s: Couldn't find stream information\n", filename);
 106                 return false;
 107         }
 108
 109         // Find the first audio stream.
 110         int audio_stream_index = -1;
 111         for (unsigned i = 0; i < format_ctx->nb_streams; ++i) {
 112                 if (format_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 113                         audio_stream_index = i;
 114                         break;
 115                 }
 116         }
 117         if (audio_stream_index == -1) {
 118                 fprintf(stderr, "%s: Couldn't find an audio stream\n", filename);
 119                 return false;
 120         }
 121
 122         AVCodec *codec = avcodec_find_decoder(format_ctx->streams[audio_stream_index]->codec->codec_id);
 123         if (codec == nullptr) {
 124                 fprintf(stderr, "%s: Unsupported codec\n", filename);
 125                 return false;
 126         }
 127
 128         AVCodecContext *codec_ctx = avcodec_alloc_context3(codec);
 129         std::unique_ptr<AVCodecContext, AVCodecContextDeleter> codec_ctx_deleter(codec_ctx);
 130         if (avcodec_copy_context(codec_ctx, format_ctx->streams[audio_stream_index]->codec) != 0) {
 131                 fprintf(stderr, "%s: Couldn't copy codec context\n", filename);
 132                 return false;
 133         }
 134
 135         if (avcodec_open2(codec_ctx, codec, nullptr) < 0) {
 136                 fprintf(stderr, "%s: Couldn't open codec\n", filename);
 137                 return false;
 138         }
 139
 140         // Init resampler (to downmix to mono and convert to s16).
 141         if (codec_ctx->channel_layout == 0) {
 142                 codec_ctx->channel_layout = av_get_default_channel_layout(codec_ctx->channels);
 143         }
 144         SwrContext *swr = swr_alloc_set_opts(
 145                 nullptr,
 146                 AV_CH_LAYOUT_MONO, AV_SAMPLE_FMT_FLT, codec_ctx->sample_rate,
 147                 codec_ctx->channel_layout, codec_ctx->sample_fmt, codec_ctx->sample_rate,
 148                 0, nullptr);
 149         std::unique_ptr<SwrContext, SwrContextDeleter> swr_deleter(swr);
 150         if (swr_init(swr) < 0) {
 151                 fprintf(stderr, "%s: Couldn't initialize resampler\n", filename);
 152                 return false;
 153         }
 154
 155         AVPacket packet;
 156         AVFrame* audio_frame = av_frame_alloc();
 157         std::unique_ptr<AVFrame, AVFrameDeleter> audio_frame_deleter(audio_frame);
 158         while (av_read_frame(format_ctx, &packet) >= 0) {
 159                 std::unique_ptr<AVPacket, AVPacketDeleter> av_packet_deleter(&packet);
 160
 161                 if (packet.stream_index != audio_stream_index) {
 162                         continue;
 163                 }
 164
 165                 while (packet.size > 0) {
 166                         int got_frame = 0;
 167                         int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples);
 168                         if (len1 < 0) {
 169                                 fprintf(stderr, "%s: Couldn't decode audio\n", filename);
 170                                 return false;
 171                         }
 172                         if (!got_frame) {
 173                                 break;
 174                         }
 175                         packet.data += len1;
 176                         packet.size -= len1;
 177                 }
 178         }
 179
 180         // Flush any delayed data from the end.
 181         packet.data = nullptr;
 182         packet.size = 0;
 183         int got_frame = 0;
 184         do {
 185                 int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples);
 186                 if (len1 < 0) {
 187                         fprintf(stderr, "%s: Couldn't decode audio\n", filename);
 188                         return false;
 189                 }
 190         } while (got_frame);
 191
 192         // Convert any leftover samples from the converter.
 193         convert_samples(swr, codec_ctx->sample_rate, nullptr, 0, samples);
 194
 195         *sample_rate = codec_ctx->sample_rate;
 196
 197         return true;
 198 }