git.sesse.net Git - ffmpeg/blob - doc/examples/transcode_aac.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 /**
  20  * @file
  21  * simple audio converter
  22  *
  23  * @example transcode_aac.c
  24  * Convert an input audio file to AAC in an MP4 container using FFmpeg.
  25  * @author Andreas Unterweger (dustsigns@gmail.com)
  26  */
  27
  28 #include <stdio.h>
  29
  30 #include "libavformat/avformat.h"
  31 #include "libavformat/avio.h"
  32
  33 #include "libavcodec/avcodec.h"
  34
  35 #include "libavutil/audio_fifo.h"
  36 #include "libavutil/avassert.h"
  37 #include "libavutil/avstring.h"
  38 #include "libavutil/frame.h"
  39 #include "libavutil/opt.h"
  40
  41 #include "libswresample/swresample.h"
  42
  43 /** The output bit rate in kbit/s */
  44 #define OUTPUT_BIT_RATE 96000
  45 /** The number of output channels */
  46 #define OUTPUT_CHANNELS 2
  47
  48 /**
  49  * Convert an error code into a text message.
  50  * @param error Error code to be converted
  51  * @return Corresponding error text (not thread-safe)
  52  */
  53 static const char *get_error_text(const int error)
  54 {
  55     static char error_buffer[255];
  56     av_strerror(error, error_buffer, sizeof(error_buffer));
  57     return error_buffer;
  58 }
  59
  60 /** Open an input file and the required decoder. */
  61 static int open_input_file(const char *filename,
  62                            AVFormatContext **input_format_context,
  63                            AVCodecContext **input_codec_context)
  64 {
  65     AVCodec *input_codec;
  66     int error;
  67
  68     /** Open the input file to read from it. */
  69     if ((error = avformat_open_input(input_format_context, filename, NULL,
  70                                      NULL)) < 0) {
  71         fprintf(stderr, "Could not open input file '%s' (error '%s')\n",
  72                 filename, get_error_text(error));
  73         *input_format_context = NULL;
  74         return error;
  75     }
  76
  77     /** Get information on the input file (number of streams etc.). */
  78     if ((error = avformat_find_stream_info(*input_format_context, NULL)) < 0) {
  79         fprintf(stderr, "Could not open find stream info (error '%s')\n",
  80                 get_error_text(error));
  81         avformat_close_input(input_format_context);
  82         return error;
  83     }
  84
  85     /** Make sure that there is only one stream in the input file. */
  86     if ((*input_format_context)->nb_streams != 1) {
  87         fprintf(stderr, "Expected one audio input stream, but found %d\n",
  88                 (*input_format_context)->nb_streams);
  89         avformat_close_input(input_format_context);
  90         return AVERROR_EXIT;
  91     }
  92
  93     /** Find a decoder for the audio stream. */
  94     if (!(input_codec = avcodec_find_decoder((*input_format_context)->streams[0]->codec->codec_id))) {
  95         fprintf(stderr, "Could not find input codec\n");
  96         avformat_close_input(input_format_context);
  97         return AVERROR_EXIT;
  98     }
  99
 100     /** Open the decoder for the audio stream to use it later. */
 101     if ((error = avcodec_open2((*input_format_context)->streams[0]->codec,
 102                                input_codec, NULL)) < 0) {
 103         fprintf(stderr, "Could not open input codec (error '%s')\n",
 104                 get_error_text(error));
 105         avformat_close_input(input_format_context);
 106         return error;
 107     }
 108
 109     /** Save the decoder context for easier access later. */
 110     *input_codec_context = (*input_format_context)->streams[0]->codec;
 111
 112     return 0;
 113 }
 114
 115 /**
 116  * Open an output file and the required encoder.
 117  * Also set some basic encoder parameters.
 118  * Some of these parameters are based on the input file's parameters.
 119  */
 120 static int open_output_file(const char *filename,
 121                             AVCodecContext *input_codec_context,
 122                             AVFormatContext **output_format_context,
 123                             AVCodecContext **output_codec_context)
 124 {
 125     AVIOContext *output_io_context = NULL;
 126     AVStream *stream               = NULL;
 127     AVCodec *output_codec          = NULL;
 128     int error;
 129
 130     /** Open the output file to write to it. */
 131     if ((error = avio_open(&output_io_context, filename,
 132                            AVIO_FLAG_WRITE)) < 0) {
 133         fprintf(stderr, "Could not open output file '%s' (error '%s')\n",
 134                 filename, get_error_text(error));
 135         return error;
 136     }
 137
 138     /** Create a new format context for the output container format. */
 139     if (!(*output_format_context = avformat_alloc_context())) {
 140         fprintf(stderr, "Could not allocate output format context\n");
 141         return AVERROR(ENOMEM);
 142     }
 143
 144     /** Associate the output file (pointer) with the container format context. */
 145     (*output_format_context)->pb = output_io_context;
 146
 147     /** Guess the desired container format based on the file extension. */
 148     if (!((*output_format_context)->oformat = av_guess_format(NULL, filename,
 149                                                               NULL))) {
 150         fprintf(stderr, "Could not find output file format\n");
 151         goto cleanup;
 152     }
 153
 154     av_strlcpy((*output_format_context)->filename, filename,
 155                sizeof((*output_format_context)->filename));
 156
 157     /** Find the encoder to be used by its name. */
 158     if (!(output_codec = avcodec_find_encoder(AV_CODEC_ID_AAC))) {
 159         fprintf(stderr, "Could not find an AAC encoder.\n");
 160         goto cleanup;
 161     }
 162
 163     /** Create a new audio stream in the output file container. */
 164     if (!(stream = avformat_new_stream(*output_format_context, output_codec))) {
 165         fprintf(stderr, "Could not create new stream\n");
 166         error = AVERROR(ENOMEM);
 167         goto cleanup;
 168     }
 169
 170     /** Save the encoder context for easier access later. */
 171     *output_codec_context = stream->codec;
 172
 173     /**
 174      * Set the basic encoder parameters.
 175      * The input file's sample rate is used to avoid a sample rate conversion.
 176      */
 177     (*output_codec_context)->channels       = OUTPUT_CHANNELS;
 178     (*output_codec_context)->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS);
 179     (*output_codec_context)->sample_rate    = input_codec_context->sample_rate;
 180     (*output_codec_context)->sample_fmt     = output_codec->sample_fmts[0];
 181     (*output_codec_context)->bit_rate       = OUTPUT_BIT_RATE;
 182
 183     /** Allow the use of the experimental AAC encoder */
 184     (*output_codec_context)->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
 185
 186     /** Set the sample rate for the container. */
 187     stream->time_base.den = input_codec_context->sample_rate;
 188     stream->time_base.num = 1;
 189
 190     /**
 191      * Some container formats (like MP4) require global headers to be present
 192      * Mark the encoder so that it behaves accordingly.
 193      */
 194     if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER)
 195         (*output_codec_context)->flags |= CODEC_FLAG_GLOBAL_HEADER;
 196
 197     /** Open the encoder for the audio stream to use it later. */
 198     if ((error = avcodec_open2(*output_codec_context, output_codec, NULL)) < 0) {
 199         fprintf(stderr, "Could not open output codec (error '%s')\n",
 200                 get_error_text(error));
 201         goto cleanup;
 202     }
 203
 204     return 0;
 205
 206 cleanup:
 207     avio_closep(&(*output_format_context)->pb);
 208     avformat_free_context(*output_format_context);
 209     *output_format_context = NULL;
 210     return error < 0 ? error : AVERROR_EXIT;
 211 }
 212
 213 /** Initialize one data packet for reading or writing. */
 214 static void init_packet(AVPacket *packet)
 215 {
 216     av_init_packet(packet);
 217     /** Set the packet data and size so that it is recognized as being empty. */
 218     packet->data = NULL;
 219     packet->size = 0;
 220 }
 221
 222 /** Initialize one audio frame for reading from the input file */
 223 static int init_input_frame(AVFrame **frame)
 224 {
 225     if (!(*frame = av_frame_alloc())) {
 226         fprintf(stderr, "Could not allocate input frame\n");
 227         return AVERROR(ENOMEM);
 228     }
 229     return 0;
 230 }
 231
 232 /**
 233  * Initialize the audio resampler based on the input and output codec settings.
 234  * If the input and output sample formats differ, a conversion is required
 235  * libswresample takes care of this, but requires initialization.
 236  */
 237 static int init_resampler(AVCodecContext *input_codec_context,
 238                           AVCodecContext *output_codec_context,
 239                           SwrContext **resample_context)
 240 {
 241         int error;
 242
 243         /**
 244          * Create a resampler context for the conversion.
 245          * Set the conversion parameters.
 246          * Default channel layouts based on the number of channels
 247          * are assumed for simplicity (they are sometimes not detected
 248          * properly by the demuxer and/or decoder).
 249          */
 250         *resample_context = swr_alloc_set_opts(NULL,
 251                                               av_get_default_channel_layout(output_codec_context->channels),
 252                                               output_codec_context->sample_fmt,
 253                                               output_codec_context->sample_rate,
 254                                               av_get_default_channel_layout(input_codec_context->channels),
 255                                               input_codec_context->sample_fmt,
 256                                               input_codec_context->sample_rate,
 257                                               0, NULL);
 258         if (!*resample_context) {
 259             fprintf(stderr, "Could not allocate resample context\n");
 260             return AVERROR(ENOMEM);
 261         }
 262         /**
 263         * Perform a sanity check so that the number of converted samples is
 264         * not greater than the number of samples to be converted.
 265         * If the sample rates differ, this case has to be handled differently
 266         */
 267         av_assert0(output_codec_context->sample_rate == input_codec_context->sample_rate);
 268
 269         /** Open the resampler with the specified parameters. */
 270         if ((error = swr_init(*resample_context)) < 0) {
 271             fprintf(stderr, "Could not open resample context\n");
 272             swr_free(resample_context);
 273             return error;
 274         }
 275     return 0;
 276 }
 277
 278 /** Initialize a FIFO buffer for the audio samples to be encoded. */
 279 static int init_fifo(AVAudioFifo **fifo, AVCodecContext *output_codec_context)
 280 {
 281     /** Create the FIFO buffer based on the specified output sample format. */
 282     if (!(*fifo = av_audio_fifo_alloc(output_codec_context->sample_fmt,
 283                                       output_codec_context->channels, 1))) {
 284         fprintf(stderr, "Could not allocate FIFO\n");
 285         return AVERROR(ENOMEM);
 286     }
 287     return 0;
 288 }
 289
 290 /** Write the header of the output file container. */
 291 static int write_output_file_header(AVFormatContext *output_format_context)
 292 {
 293     int error;
 294     if ((error = avformat_write_header(output_format_context, NULL)) < 0) {
 295         fprintf(stderr, "Could not write output file header (error '%s')\n",
 296                 get_error_text(error));
 297         return error;
 298     }
 299     return 0;
 300 }
 301
 302 /** Decode one audio frame from the input file. */
 303 static int decode_audio_frame(AVFrame *frame,
 304                               AVFormatContext *input_format_context,
 305                               AVCodecContext *input_codec_context,
 306                               int *data_present, int *finished)
 307 {
 308     /** Packet used for temporary storage. */
 309     AVPacket input_packet;
 310     int error;
 311     init_packet(&input_packet);
 312
 313     /** Read one audio frame from the input file into a temporary packet. */
 314     if ((error = av_read_frame(input_format_context, &input_packet)) < 0) {
 315         /** If we are at the end of the file, flush the decoder below. */
 316         if (error == AVERROR_EOF)
 317             *finished = 1;
 318         else {
 319             fprintf(stderr, "Could not read frame (error '%s')\n",
 320                     get_error_text(error));
 321             return error;
 322         }
 323     }
 324
 325     /**
 326      * Decode the audio frame stored in the temporary packet.
 327      * The input audio stream decoder is used to do this.
 328      * If we are at the end of the file, pass an empty packet to the decoder
 329      * to flush it.
 330      */
 331     if ((error = avcodec_decode_audio4(input_codec_context, frame,
 332                                        data_present, &input_packet)) < 0) {
 333         fprintf(stderr, "Could not decode frame (error '%s')\n",
 334                 get_error_text(error));
 335         av_free_packet(&input_packet);
 336         return error;
 337     }
 338
 339     /**
 340      * If the decoder has not been flushed completely, we are not finished,
 341      * so that this function has to be called again.
 342      */
 343     if (*finished && *data_present)
 344         *finished = 0;
 345     av_free_packet(&input_packet);
 346     return 0;
 347 }
 348
 349 /**
 350  * Initialize a temporary storage for the specified number of audio samples.
 351  * The conversion requires temporary storage due to the different format.
 352  * The number of audio samples to be allocated is specified in frame_size.
 353  */
 354 static int init_converted_samples(uint8_t ***converted_input_samples,
 355                                   AVCodecContext *output_codec_context,
 356                                   int frame_size)
 357 {
 358     int error;
 359
 360     /**
 361      * Allocate as many pointers as there are audio channels.
 362      * Each pointer will later point to the audio samples of the corresponding
 363      * channels (although it may be NULL for interleaved formats).
 364      */
 365     if (!(*converted_input_samples = calloc(output_codec_context->channels,
 366                                             sizeof(**converted_input_samples)))) {
 367         fprintf(stderr, "Could not allocate converted input sample pointers\n");
 368         return AVERROR(ENOMEM);
 369     }
 370
 371     /**
 372      * Allocate memory for the samples of all channels in one consecutive
 373      * block for convenience.
 374      */
 375     if ((error = av_samples_alloc(*converted_input_samples, NULL,
 376                                   output_codec_context->channels,
 377                                   frame_size,
 378                                   output_codec_context->sample_fmt, 0)) < 0) {
 379         fprintf(stderr,
 380                 "Could not allocate converted input samples (error '%s')\n",
 381                 get_error_text(error));
 382         av_freep(&(*converted_input_samples)[0]);
 383         free(*converted_input_samples);
 384         return error;
 385     }
 386     return 0;
 387 }
 388
 389 /**
 390  * Convert the input audio samples into the output sample format.
 391  * The conversion happens on a per-frame basis, the size of which is specified
 392  * by frame_size.
 393  */
 394 static int convert_samples(const uint8_t **input_data,
 395                            uint8_t **converted_data, const int frame_size,
 396                            SwrContext *resample_context)
 397 {
 398     int error;
 399
 400     /** Convert the samples using the resampler. */
 401     if ((error = swr_convert(resample_context,
 402                              converted_data, frame_size,
 403                              input_data    , frame_size)) < 0) {
 404         fprintf(stderr, "Could not convert input samples (error '%s')\n",
 405                 get_error_text(error));
 406         return error;
 407     }
 408
 409     return 0;
 410 }
 411
 412 /** Add converted input audio samples to the FIFO buffer for later processing. */
 413 static int add_samples_to_fifo(AVAudioFifo *fifo,
 414                                uint8_t **converted_input_samples,
 415                                const int frame_size)
 416 {
 417     int error;
 418
 419     /**
 420      * Make the FIFO as large as it needs to be to hold both,
 421      * the old and the new samples.
 422      */
 423     if ((error = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame_size)) < 0) {
 424         fprintf(stderr, "Could not reallocate FIFO\n");
 425         return error;
 426     }
 427
 428     /** Store the new samples in the FIFO buffer. */
 429     if (av_audio_fifo_write(fifo, (void **)converted_input_samples,
 430                             frame_size) < frame_size) {
 431         fprintf(stderr, "Could not write data to FIFO\n");
 432         return AVERROR_EXIT;
 433     }
 434     return 0;
 435 }
 436
 437 /**
 438  * Read one audio frame from the input file, decodes, converts and stores
 439  * it in the FIFO buffer.
 440  */
 441 static int read_decode_convert_and_store(AVAudioFifo *fifo,
 442                                          AVFormatContext *input_format_context,
 443                                          AVCodecContext *input_codec_context,
 444                                          AVCodecContext *output_codec_context,
 445                                          SwrContext *resampler_context,
 446                                          int *finished)
 447 {
 448     /** Temporary storage of the input samples of the frame read from the file. */
 449     AVFrame *input_frame = NULL;
 450     /** Temporary storage for the converted input samples. */
 451     uint8_t **converted_input_samples = NULL;
 452     int data_present;
 453     int ret = AVERROR_EXIT;
 454
 455     /** Initialize temporary storage for one input frame. */
 456     if (init_input_frame(&input_frame))
 457         goto cleanup;
 458     /** Decode one frame worth of audio samples. */
 459     if (decode_audio_frame(input_frame, input_format_context,
 460                            input_codec_context, &data_present, finished))
 461         goto cleanup;
 462     /**
 463      * If we are at the end of the file and there are no more samples
 464      * in the decoder which are delayed, we are actually finished.
 465      * This must not be treated as an error.
 466      */
 467     if (*finished && !data_present) {
 468         ret = 0;
 469         goto cleanup;
 470     }
 471     /** If there is decoded data, convert and store it */
 472     if (data_present) {
 473         /** Initialize the temporary storage for the converted input samples. */
 474         if (init_converted_samples(&converted_input_samples, output_codec_context,
 475                                    input_frame->nb_samples))
 476             goto cleanup;
 477
 478         /**
 479          * Convert the input samples to the desired output sample format.
 480          * This requires a temporary storage provided by converted_input_samples.
 481          */
 482         if (convert_samples((const uint8_t**)input_frame->extended_data, converted_input_samples,
 483                             input_frame->nb_samples, resampler_context))
 484             goto cleanup;
 485
 486         /** Add the converted input samples to the FIFO buffer for later processing. */
 487         if (add_samples_to_fifo(fifo, converted_input_samples,
 488                                 input_frame->nb_samples))
 489             goto cleanup;
 490         ret = 0;
 491     }
 492     ret = 0;
 493
 494 cleanup:
 495     if (converted_input_samples) {
 496         av_freep(&converted_input_samples[0]);
 497         free(converted_input_samples);
 498     }
 499     av_frame_free(&input_frame);
 500
 501     return ret;
 502 }
 503
 504 /**
 505  * Initialize one input frame for writing to the output file.
 506  * The frame will be exactly frame_size samples large.
 507  */
 508 static int init_output_frame(AVFrame **frame,
 509                              AVCodecContext *output_codec_context,
 510                              int frame_size)
 511 {
 512     int error;
 513
 514     /** Create a new frame to store the audio samples. */
 515     if (!(*frame = av_frame_alloc())) {
 516         fprintf(stderr, "Could not allocate output frame\n");
 517         return AVERROR_EXIT;
 518     }
 519
 520     /**
 521      * Set the frame's parameters, especially its size and format.
 522      * av_frame_get_buffer needs this to allocate memory for the
 523      * audio samples of the frame.
 524      * Default channel layouts based on the number of channels
 525      * are assumed for simplicity.
 526      */
 527     (*frame)->nb_samples     = frame_size;
 528     (*frame)->channel_layout = output_codec_context->channel_layout;
 529     (*frame)->format         = output_codec_context->sample_fmt;
 530     (*frame)->sample_rate    = output_codec_context->sample_rate;
 531
 532     /**
 533      * Allocate the samples of the created frame. This call will make
 534      * sure that the audio frame can hold as many samples as specified.
 535      */
 536     if ((error = av_frame_get_buffer(*frame, 0)) < 0) {
 537         fprintf(stderr, "Could allocate output frame samples (error '%s')\n",
 538                 get_error_text(error));
 539         av_frame_free(frame);
 540         return error;
 541     }
 542
 543     return 0;
 544 }
 545
 546 /** Global timestamp for the audio frames */
 547 static int64_t pts = 0;
 548
 549 /** Encode one frame worth of audio to the output file. */
 550 static int encode_audio_frame(AVFrame *frame,
 551                               AVFormatContext *output_format_context,
 552                               AVCodecContext *output_codec_context,
 553                               int *data_present)
 554 {
 555     /** Packet used for temporary storage. */
 556     AVPacket output_packet;
 557     int error;
 558     init_packet(&output_packet);
 559
 560     /** Set a timestamp based on the sample rate for the container. */
 561     if (frame) {
 562         frame->pts = pts;
 563         pts += frame->nb_samples;
 564     }
 565
 566     /**
 567      * Encode the audio frame and store it in the temporary packet.
 568      * The output audio stream encoder is used to do this.
 569      */
 570     if ((error = avcodec_encode_audio2(output_codec_context, &output_packet,
 571                                        frame, data_present)) < 0) {
 572         fprintf(stderr, "Could not encode frame (error '%s')\n",
 573                 get_error_text(error));
 574         av_free_packet(&output_packet);
 575         return error;
 576     }
 577
 578     /** Write one audio frame from the temporary packet to the output file. */
 579     if (*data_present) {
 580         if ((error = av_write_frame(output_format_context, &output_packet)) < 0) {
 581             fprintf(stderr, "Could not write frame (error '%s')\n",
 582                     get_error_text(error));
 583             av_free_packet(&output_packet);
 584             return error;
 585         }
 586
 587         av_free_packet(&output_packet);
 588     }
 589
 590     return 0;
 591 }
 592
 593 /**
 594  * Load one audio frame from the FIFO buffer, encode and write it to the
 595  * output file.
 596  */
 597 static int load_encode_and_write(AVAudioFifo *fifo,
 598                                  AVFormatContext *output_format_context,
 599                                  AVCodecContext *output_codec_context)
 600 {
 601     /** Temporary storage of the output samples of the frame written to the file. */
 602     AVFrame *output_frame;
 603     /**
 604      * Use the maximum number of possible samples per frame.
 605      * If there is less than the maximum possible frame size in the FIFO
 606      * buffer use this number. Otherwise, use the maximum possible frame size
 607      */
 608     const int frame_size = FFMIN(av_audio_fifo_size(fifo),
 609                                  output_codec_context->frame_size);
 610     int data_written;
 611
 612     /** Initialize temporary storage for one output frame. */
 613     if (init_output_frame(&output_frame, output_codec_context, frame_size))
 614         return AVERROR_EXIT;
 615
 616     /**
 617      * Read as many samples from the FIFO buffer as required to fill the frame.
 618      * The samples are stored in the frame temporarily.
 619      */
 620     if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) {
 621         fprintf(stderr, "Could not read data from FIFO\n");
 622         av_frame_free(&output_frame);
 623         return AVERROR_EXIT;
 624     }
 625
 626     /** Encode one frame worth of audio samples. */
 627     if (encode_audio_frame(output_frame, output_format_context,
 628                            output_codec_context, &data_written)) {
 629         av_frame_free(&output_frame);
 630         return AVERROR_EXIT;
 631     }
 632     av_frame_free(&output_frame);
 633     return 0;
 634 }
 635
 636 /** Write the trailer of the output file container. */
 637 static int write_output_file_trailer(AVFormatContext *output_format_context)
 638 {
 639     int error;
 640     if ((error = av_write_trailer(output_format_context)) < 0) {
 641         fprintf(stderr, "Could not write output file trailer (error '%s')\n",
 642                 get_error_text(error));
 643         return error;
 644     }
 645     return 0;
 646 }
 647
 648 /** Convert an audio file to an AAC file in an MP4 container. */
 649 int main(int argc, char **argv)
 650 {
 651     AVFormatContext *input_format_context = NULL, *output_format_context = NULL;
 652     AVCodecContext *input_codec_context = NULL, *output_codec_context = NULL;
 653     SwrContext *resample_context = NULL;
 654     AVAudioFifo *fifo = NULL;
 655     int ret = AVERROR_EXIT;
 656
 657     if (argc < 3) {
 658         fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]);
 659         exit(1);
 660     }
 661
 662     /** Register all codecs and formats so that they can be used. */
 663     av_register_all();
 664     /** Open the input file for reading. */
 665     if (open_input_file(argv[1], &input_format_context,
 666                         &input_codec_context))
 667         goto cleanup;
 668     /** Open the output file for writing. */
 669     if (open_output_file(argv[2], input_codec_context,
 670                          &output_format_context, &output_codec_context))
 671         goto cleanup;
 672     /** Initialize the resampler to be able to convert audio sample formats. */
 673     if (init_resampler(input_codec_context, output_codec_context,
 674                        &resample_context))
 675         goto cleanup;
 676     /** Initialize the FIFO buffer to store audio samples to be encoded. */
 677     if (init_fifo(&fifo, output_codec_context))
 678         goto cleanup;
 679     /** Write the header of the output file container. */
 680     if (write_output_file_header(output_format_context))
 681         goto cleanup;
 682
 683     /**
 684      * Loop as long as we have input samples to read or output samples
 685      * to write; abort as soon as we have neither.
 686      */
 687     while (1) {
 688         /** Use the encoder's desired frame size for processing. */
 689         const int output_frame_size = output_codec_context->frame_size;
 690         int finished                = 0;
 691
 692         /**
 693          * Make sure that there is one frame worth of samples in the FIFO
 694          * buffer so that the encoder can do its work.
 695          * Since the decoder's and the encoder's frame size may differ, we
 696          * need to FIFO buffer to store as many frames worth of input samples
 697          * that they make up at least one frame worth of output samples.
 698          */
 699         while (av_audio_fifo_size(fifo) < output_frame_size) {
 700             /**
 701              * Decode one frame worth of audio samples, convert it to the
 702              * output sample format and put it into the FIFO buffer.
 703              */
 704             if (read_decode_convert_and_store(fifo, input_format_context,
 705                                               input_codec_context,
 706                                               output_codec_context,
 707                                               resample_context, &finished))
 708                 goto cleanup;
 709
 710             /**
 711              * If we are at the end of the input file, we continue
 712              * encoding the remaining audio samples to the output file.
 713              */
 714             if (finished)
 715                 break;
 716         }
 717
 718         /**
 719          * If we have enough samples for the encoder, we encode them.
 720          * At the end of the file, we pass the remaining samples to
 721          * the encoder.
 722          */
 723         while (av_audio_fifo_size(fifo) >= output_frame_size ||
 724                (finished && av_audio_fifo_size(fifo) > 0))
 725             /**
 726              * Take one frame worth of audio samples from the FIFO buffer,
 727              * encode it and write it to the output file.
 728              */
 729             if (load_encode_and_write(fifo, output_format_context,
 730                                       output_codec_context))
 731                 goto cleanup;
 732
 733         /**
 734          * If we are at the end of the input file and have encoded
 735          * all remaining samples, we can exit this loop and finish.
 736          */
 737         if (finished) {
 738             int data_written;
 739             /** Flush the encoder as it may have delayed frames. */
 740             do {
 741                 if (encode_audio_frame(NULL, output_format_context,
 742                                        output_codec_context, &data_written))
 743                     goto cleanup;
 744             } while (data_written);
 745             break;
 746         }
 747     }
 748
 749     /** Write the trailer of the output file container. */
 750     if (write_output_file_trailer(output_format_context))
 751         goto cleanup;
 752     ret = 0;
 753
 754 cleanup:
 755     if (fifo)
 756         av_audio_fifo_free(fifo);
 757     swr_free(&resample_context);
 758     if (output_codec_context)
 759         avcodec_close(output_codec_context);
 760     if (output_format_context) {
 761         avio_closep(&output_format_context->pb);
 762         avformat_free_context(output_format_context);
 763     }
 764     if (input_codec_context)
 765         avcodec_close(input_codec_context);
 766     if (input_format_context)
 767         avformat_close_input(&input_format_context);
 768
 769     return ret;
 770 }