git.sesse.net Git - nageru/blob - audio_encoder.cpp

   1 #include "audio_encoder.h"
   2
   3 extern "C" {
   4 #include <libavcodec/avcodec.h>
   5 #include <libavformat/avformat.h>
   6 #include <libavresample/avresample.h>
   7 #include <libavutil/channel_layout.h>
   8 #include <libavutil/frame.h>
   9 #include <libavutil/rational.h>
  10 #include <libavutil/samplefmt.h>
  11 #include <libavutil/opt.h>
  12 }
  13
  14 #include <assert.h>
  15
  16 #include <string>
  17 #include <vector>
  18
  19 #include "defs.h"
  20 #include "timebase.h"
  21
  22 using namespace std;
  23
  24 AudioEncoder::AudioEncoder(const string &codec_name, int bit_rate, const vector<Mux *> &muxes)
  25         : muxes(muxes)
  26 {
  27         AVCodec *codec = avcodec_find_encoder_by_name(codec_name.c_str());
  28         if (codec == nullptr) {
  29                 fprintf(stderr, "ERROR: Could not find codec '%s'\n", codec_name.c_str());
  30                 exit(1);
  31         }
  32
  33         ctx = avcodec_alloc_context3(codec);
  34         ctx->bit_rate = bit_rate;
  35         ctx->sample_rate = OUTPUT_FREQUENCY;
  36         ctx->sample_fmt = codec->sample_fmts[0];
  37         ctx->channels = 2;
  38         ctx->channel_layout = AV_CH_LAYOUT_STEREO;
  39         ctx->time_base = AVRational{1, TIMEBASE};
  40         ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
  41         if (avcodec_open2(ctx, codec, NULL) < 0) {
  42                 fprintf(stderr, "Could not open codec '%s'\n", codec_name.c_str());
  43                 exit(1);
  44         }
  45
  46         resampler = avresample_alloc_context();
  47         if (resampler == nullptr) {
  48                 fprintf(stderr, "Allocating resampler failed.\n");
  49                 exit(1);
  50         }
  51
  52         av_opt_set_int(resampler, "in_channel_layout",  AV_CH_LAYOUT_STEREO,       0);
  53         av_opt_set_int(resampler, "out_channel_layout", AV_CH_LAYOUT_STEREO,       0);
  54         av_opt_set_int(resampler, "in_sample_rate",     OUTPUT_FREQUENCY,          0);
  55         av_opt_set_int(resampler, "out_sample_rate",    OUTPUT_FREQUENCY,          0);
  56         av_opt_set_int(resampler, "in_sample_fmt",      AV_SAMPLE_FMT_FLT,         0);
  57         av_opt_set_int(resampler, "out_sample_fmt",     ctx->sample_fmt, 0);
  58
  59         if (avresample_open(resampler) < 0) {
  60                 fprintf(stderr, "Could not open resample context.\n");
  61                 exit(1);
  62         }
  63
  64         audio_frame = av_frame_alloc();
  65 }
  66
  67 AudioEncoder::~AudioEncoder()
  68 {
  69         av_frame_free(&audio_frame);
  70         avresample_free(&resampler);
  71         avcodec_free_context(&ctx);
  72 }
  73
  74 void AudioEncoder::encode_audio(const vector<float> &audio, int64_t audio_pts)
  75 {
  76         if (ctx->frame_size == 0) {
  77                 // No queueing needed.
  78                 assert(audio_queue.empty());
  79                 assert(audio.size() % 2 == 0);
  80                 encode_audio_one_frame(&audio[0], audio.size() / 2, audio_pts);
  81                 return;
  82         }
  83
  84         int64_t sample_offset = audio_queue.size();
  85
  86         audio_queue.insert(audio_queue.end(), audio.begin(), audio.end());
  87         size_t sample_num;
  88         for (sample_num = 0;
  89              sample_num + ctx->frame_size * 2 <= audio_queue.size();
  90              sample_num += ctx->frame_size * 2) {
  91                 int64_t adjusted_audio_pts = audio_pts + (int64_t(sample_num) - sample_offset) * TIMEBASE / (OUTPUT_FREQUENCY * 2);
  92                 encode_audio_one_frame(&audio_queue[sample_num],
  93                                        ctx->frame_size,
  94                                        adjusted_audio_pts);
  95         }
  96         audio_queue.erase(audio_queue.begin(), audio_queue.begin() + sample_num);
  97
  98         last_pts = audio_pts + audio.size() * TIMEBASE / (OUTPUT_FREQUENCY * 2);
  99 }
 100
 101 void AudioEncoder::encode_audio_one_frame(const float *audio, size_t num_samples, int64_t audio_pts)
 102 {
 103         audio_frame->pts = audio_pts;
 104         audio_frame->nb_samples = num_samples;
 105         audio_frame->channel_layout = AV_CH_LAYOUT_STEREO;
 106         audio_frame->format = ctx->sample_fmt;
 107         audio_frame->sample_rate = OUTPUT_FREQUENCY;
 108
 109         if (av_samples_alloc(audio_frame->data, nullptr, 2, num_samples, ctx->sample_fmt, 0) < 0) {
 110                 fprintf(stderr, "Could not allocate %ld samples.\n", num_samples);
 111                 exit(1);
 112         }
 113
 114         if (avresample_convert(resampler, audio_frame->data, 0, num_samples,
 115                                (uint8_t **)&audio, 0, num_samples) < 0) {
 116                 fprintf(stderr, "Audio conversion failed.\n");
 117                 exit(1);
 118         }
 119
 120         AVPacket pkt;
 121         av_init_packet(&pkt);
 122         pkt.data = nullptr;
 123         pkt.size = 0;
 124         int got_output = 0;
 125         avcodec_encode_audio2(ctx, &pkt, audio_frame, &got_output);
 126         if (got_output) {
 127                 pkt.stream_index = 1;
 128                 pkt.flags = 0;
 129                 for (Mux *mux : muxes) {
 130                         mux->add_packet(pkt, pkt.pts, pkt.dts);
 131                 }
 132         }
 133
 134         av_freep(&audio_frame->data[0]);
 135
 136         av_frame_unref(audio_frame);
 137         av_free_packet(&pkt);
 138 }
 139
 140 void AudioEncoder::encode_last_audio()
 141 {
 142         if (!audio_queue.empty()) {
 143                 // Last frame can be whatever size we want.
 144                 assert(audio_queue.size() % 2 == 0);
 145                 encode_audio_one_frame(&audio_queue[0], audio_queue.size() / 2, last_pts);
 146                 audio_queue.clear();
 147         }
 148
 149         if (ctx->codec->capabilities & AV_CODEC_CAP_DELAY) {
 150                 // Collect any delayed frames.
 151                 for ( ;; ) {
 152                         int got_output = 0;
 153                         AVPacket pkt;
 154                         av_init_packet(&pkt);
 155                         pkt.data = nullptr;
 156                         pkt.size = 0;
 157                         avcodec_encode_audio2(ctx, &pkt, nullptr, &got_output);
 158                         if (!got_output) break;
 159
 160                         pkt.stream_index = 1;
 161                         pkt.flags = 0;
 162                         for (Mux *mux : muxes) {
 163                                 mux->add_packet(pkt, pkt.pts, pkt.dts);
 164                         }
 165                         av_free_packet(&pkt);
 166                 }
 167         }
 168 }