git.sesse.net Git - c64tapwav/blob - audioreader.cpp

   1 #include <stdio.h>
   2
   3 extern "C" {
   4
   5 #define __STDC_CONSTANT_MACROS
   6
   7 #include <libavcodec/avcodec.h>
   8 #include <libavformat/avformat.h>
   9 #include <libswresample/swresample.h>
  10 #include <libavutil/avutil.h>
  11
  12 }
  13
  14 #include <memory>
  15 #include <vector>
  16
  17 namespace {
  18
  19 struct AVFormatCloserAndDeleter {
  20         void operator() (AVFormatContext *ctx) {
  21                 avformat_close_input(&ctx);
  22                 avformat_free_context(ctx);
  23         }
  24 };
  25
  26 struct AVCodecContextDeleter {
  27         void operator() (AVCodecContext *ctx) {
  28                 avcodec_close(ctx);
  29                 av_freep(&ctx);
  30         }
  31 };
  32
  33 struct SwrContextDeleter {
  34         void operator() (SwrContext *swr) {
  35                 swr_free(&swr);
  36         }
  37 };
  38
  39 struct AVPacketDeleter {
  40         void operator() (AVPacket *pkt) {
  41                 av_free_packet(pkt);
  42         }
  43 };
  44
  45 struct AVFrameDeleter {
  46         void operator() (AVFrame *frame) {
  47                 av_frame_free(&frame);
  48         }
  49 };
  50
  51 struct AVSampleDeleter {
  52         void operator() (uint8_t *data) {
  53                 av_freep(&data);
  54         }
  55 };
  56
  57 void convert_samples(SwrContext *swr, int sample_rate, const uint8_t **data, int nb_samples, std::vector<float> *samples)
  58 {
  59         int max_out_samples = nb_samples + swr_get_delay(swr, sample_rate);
  60         if (max_out_samples == 0) {
  61                 return;
  62         }
  63         uint8_t *output;
  64         av_samples_alloc(&output, nullptr, 1, max_out_samples, AV_SAMPLE_FMT_FLT, 0);
  65         std::unique_ptr<uint8_t, AVSampleDeleter> output_deleter(output);
  66
  67         int out_samples = swr_convert(swr, &output, max_out_samples, data, nb_samples);
  68         if (out_samples > 0) {
  69                 const float* start = reinterpret_cast<const float *>(output);
  70                 const float* end = start + out_samples;
  71                 samples->insert(samples->end(), start, end);
  72         }
  73 }
  74
  75 int decode_packet(const char *filename, AVCodecContext *codec_ctx, SwrContext *swr, AVFrame *audio_frame, AVPacket *packet, int *got_frame, std::vector<float> *samples)
  76 {
  77         *got_frame = 0;
  78         int len1 = avcodec_decode_audio4(codec_ctx, audio_frame, got_frame, packet);
  79         if (len1 < 0 || !*got_frame) {
  80                 return len1;
  81         }
  82
  83         if (audio_frame->channel_layout != codec_ctx->channel_layout ||
  84             audio_frame->sample_rate != codec_ctx->sample_rate) {
  85                 fprintf(stderr, "%s: Channel layout or sample rate changed mid-file\n", filename);
  86                 *got_frame = false;
  87                 return len1;
  88         }
  89         convert_samples(swr, codec_ctx->sample_rate, (const uint8_t **)audio_frame->data, audio_frame->nb_samples, samples);
  90         return len1;
  91 }
  92
  93 }  // namespace
  94
  95 bool read_audio_file(const char *filename, std::vector<float> *samples, int *sample_rate)
  96 {
  97         av_register_all();
  98
  99         AVFormatContext *format_ctx = nullptr;
 100         if (avformat_open_input(&format_ctx, filename, nullptr, nullptr) != 0) {
 101                 fprintf(stderr, "Couldn't open %s\n", filename);
 102                 return false;
 103         }
 104         std::unique_ptr<AVFormatContext, AVFormatCloserAndDeleter> format_ctx_closer(format_ctx);
 105
 106         if (avformat_find_stream_info(format_ctx, nullptr) < 0) {
 107                 fprintf(stderr, "%s: Couldn't find stream information\n", filename);
 108                 return false;
 109         }
 110
 111         // Find the first audio stream.
 112         int audio_stream_index = -1;
 113         for (unsigned i = 0; i < format_ctx->nb_streams; ++i) {
 114                 if (format_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 115                         audio_stream_index = i;
 116                         break;
 117                 }
 118         }
 119         if (audio_stream_index == -1) {
 120                 fprintf(stderr, "%s: Couldn't find an audio stream\n", filename);
 121                 return false;
 122         }
 123
 124         AVCodec *codec = avcodec_find_decoder(format_ctx->streams[audio_stream_index]->codec->codec_id);
 125         if (codec == nullptr) {
 126                 fprintf(stderr, "%s: Unsupported codec\n", filename);
 127                 return false;
 128         }
 129
 130         AVCodecContext *codec_ctx = avcodec_alloc_context3(codec);
 131         std::unique_ptr<AVCodecContext, AVCodecContextDeleter> codec_ctx_deleter(codec_ctx);
 132         if (avcodec_copy_context(codec_ctx, format_ctx->streams[audio_stream_index]->codec) != 0) {
 133                 fprintf(stderr, "%s: Couldn't copy codec context\n", filename);
 134                 return false;
 135         }
 136
 137         if (avcodec_open2(codec_ctx, codec, nullptr) < 0) {
 138                 fprintf(stderr, "%s: Couldn't open codec\n", filename);
 139                 return false;
 140         }
 141
 142         // Init resampler (to downmix to mono and convert to s16).
 143         if (codec_ctx->channel_layout == 0) {
 144                 codec_ctx->channel_layout = av_get_default_channel_layout(codec_ctx->channels);
 145         }
 146         SwrContext *swr = swr_alloc_set_opts(
 147                 nullptr,
 148                 AV_CH_LAYOUT_MONO, AV_SAMPLE_FMT_FLT, codec_ctx->sample_rate,
 149                 codec_ctx->channel_layout, codec_ctx->sample_fmt, codec_ctx->sample_rate,
 150                 0, nullptr);
 151         std::unique_ptr<SwrContext, SwrContextDeleter> swr_deleter(swr);
 152         if (swr_init(swr) < 0) {
 153                 fprintf(stderr, "%s: Couldn't initialize resampler\n", filename);
 154                 return false;
 155         }
 156
 157         AVPacket packet;
 158 #if (LIBAVCODEC_VERSION_MAJOR >= 55)
 159         AVFrame *audio_frame = av_frame_alloc();
 160         std::unique_ptr<AVFrame, AVFrameDeleter> audio_frame_deleter(audio_frame);
 161 #else
 162         AVFrame frame_holder {};
 163         AVFrame *audio_frame = &frame_holder;
 164 #endif
 165         while (av_read_frame(format_ctx, &packet) >= 0) {
 166                 std::unique_ptr<AVPacket, AVPacketDeleter> av_packet_deleter(&packet);
 167
 168                 if (packet.stream_index != audio_stream_index) {
 169                         continue;
 170                 }
 171
 172                 while (packet.size > 0) {
 173                         int got_frame = 0;
 174                         int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples);
 175                         if (len1 < 0) {
 176                                 fprintf(stderr, "%s: Couldn't decode audio\n", filename);
 177                                 return false;
 178                         }
 179                         if (!got_frame) {
 180                                 break;
 181                         }
 182                         packet.data += len1;
 183                         packet.size -= len1;
 184                 }
 185         }
 186
 187         // Flush any delayed data from the end.
 188         packet.data = nullptr;
 189         packet.size = 0;
 190         int got_frame = 0;
 191         do {
 192                 int len1 = decode_packet(filename, codec_ctx, swr, audio_frame, &packet, &got_frame, samples);
 193                 if (len1 < 0) {
 194                         fprintf(stderr, "%s: Couldn't decode audio\n", filename);
 195                         return false;
 196                 }
 197         } while (got_frame);
 198
 199         // Convert any leftover samples from the converter.
 200         convert_samples(swr, codec_ctx->sample_rate, nullptr, 0, samples);
 201
 202         *sample_rate = codec_ctx->sample_rate;
 203
 204         return true;
 205 }