lavc: Add flag to allow profile mismatch with hardware decoding

[ffmpeg] / libavcodec / vorbisenc.c
diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c

index 2974ca2cf95a600b3275b5b4f43c8af2d74c6441..afded40da0195e317b9f23803f375afd77e781d7 100644 (file)
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -25,6 +25,7 @@
   */
  
  #include <float.h>
+#include "libavutil/float_dsp.h"
  
  #include "avcodec.h"
  #include "internal.h"
@@ -33,6 +34,9 @@
  #include "vorbis.h"
  #include "vorbis_enc_data.h"
  
+#include "audio_frame_queue.h"
+#include "libavfilter/bufferqueue.h"
+
  #define BITSTREAM_WRITER_LE
  #include "put_bits.h"
  
@@ -108,8 +112,12 @@ typedef struct vorbis_enc_context {
      float *samples;
      float *floor;  // also used for tmp values for mdct
      float *coeffs; // also used for residue after floor
+    float *scratch; // used for tmp values for psy model
      float quality;
  
+    AudioFrameQueue afq;
+    struct FFBufQueue bufqueue;
+
      int ncodebooks;
      vorbis_enc_codebook *codebooks;
  
@@ -126,6 +134,8 @@ typedef struct vorbis_enc_context {
      vorbis_enc_mode *modes;
  
      int64_t next_pts;
+
+    AVFloatDSPContext *fdsp;
  } vorbis_enc_context;
  
  #define MAX_CHANNELS     2
@@ -236,6 +246,26 @@ static int ready_residue(vorbis_enc_residue *rc, vorbis_enc_context *venc)
      return 0;
  }
  
+static av_cold int dsp_init(AVCodecContext *avctx, vorbis_enc_context *venc)
+{
+    int ret = 0;
+
+    venc->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+    if (!venc->fdsp)
+        return AVERROR(ENOMEM);
+
+    // init windows
+    venc->win[0] = ff_vorbis_vwin[venc->log2_blocksize[0] - 6];
+    venc->win[1] = ff_vorbis_vwin[venc->log2_blocksize[1] - 6];
+
+    if ((ret = ff_mdct_init(&venc->mdct[0], venc->log2_blocksize[0], 0, 1.0)) < 0)
+        return ret;
+    if ((ret = ff_mdct_init(&venc->mdct[1], venc->log2_blocksize[1], 0, 1.0)) < 0)
+        return ret;
+
+    return 0;
+}
+
  static int create_vorbis_context(vorbis_enc_context *venc,
                                   AVCodecContext *avctx)
  {
@@ -423,15 +453,12 @@ static int create_vorbis_context(vorbis_enc_context *venc,
      venc->samples    = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]));
      venc->floor      = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2);
      venc->coeffs     = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2);
-    if (!venc->saved || !venc->samples || !venc->floor || !venc->coeffs)
-        return AVERROR(ENOMEM);
+    venc->scratch    = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2);
  
-    venc->win[0] = ff_vorbis_vwin[venc->log2_blocksize[0] - 6];
-    venc->win[1] = ff_vorbis_vwin[venc->log2_blocksize[1] - 6];
+    if (!venc->saved || !venc->samples || !venc->floor || !venc->coeffs || !venc->scratch)
+        return AVERROR(ENOMEM);
  
-    if ((ret = ff_mdct_init(&venc->mdct[0], venc->log2_blocksize[0], 0, 1.0)) < 0)
-        return ret;
-    if ((ret = ff_mdct_init(&venc->mdct[1], venc->log2_blocksize[1], 0, 1.0)) < 0)
+    if ((ret = dsp_init(avctx, venc)) < 0)
          return ret;
  
      return 0;
@@ -968,13 +995,13 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc,
  }
  
  static int apply_window_and_mdct(vorbis_enc_context *venc,
-                                 float **audio, int samples)
+                                 float *audio, int samples)
  {
-    int i, channel;
+    int channel;
      const float * win = venc->win[0];
      int window_len = 1 << (venc->log2_blocksize[0] - 1);
      float n = (float)(1 << venc->log2_blocksize[0]) / 4.0;
-    // FIXME use dsp
+    AVFloatDSPContext *fdsp = venc->fdsp;
  
      if (!venc->have_saved && !samples)
          return 0;
@@ -991,9 +1018,10 @@ static int apply_window_and_mdct(vorbis_enc_context *venc,
  
      if (samples) {
          for (channel = 0; channel < venc->channels; channel++) {
-            float * offset = venc->samples + channel*window_len*2 + window_len;
-            for (i = 0; i < samples; i++)
-                offset[i] = audio[channel][i] / n * win[window_len - i - 1];
+            float *offset = venc->samples + channel * window_len * 2 + window_len;
+
+            fdsp->vector_fmul_reverse(offset, audio + channel * window_len, win, samples);
+            fdsp->vector_fmul_scalar(offset, offset, 1/n, samples);
          }
      } else {
          for (channel = 0; channel < venc->channels; channel++)
@@ -1008,8 +1036,9 @@ static int apply_window_and_mdct(vorbis_enc_context *venc,
      if (samples) {
          for (channel = 0; channel < venc->channels; channel++) {
              float *offset = venc->saved + channel * window_len;
-            for (i = 0; i < samples; i++)
-                offset[i] = audio[channel][i] / n * win[i];
+
+            fdsp->vector_fmul(offset, audio + channel * window_len, win, samples);
+            fdsp->vector_fmul_scalar(offset, offset, 1/n, samples);
          }
          venc->have_saved = 1;
      } else {
@@ -1018,20 +1047,92 @@ static int apply_window_and_mdct(vorbis_enc_context *venc,
      return 1;
  }
  
+/* Used for padding the last encoded packet */
+static AVFrame *spawn_empty_frame(AVCodecContext *avctx, int channels)
+{
+    AVFrame *f = av_frame_alloc();
+
+    if (!f)
+        return NULL;
+
+    f->format = avctx->sample_fmt;
+    f->nb_samples = avctx->frame_size;
+    f->channel_layout = avctx->channel_layout;
+
+    if (av_frame_get_buffer(f, 4)) {
+        av_frame_free(&f);
+        return NULL;
+    }
+
+    for (int ch = 0; ch < channels; ch++) {
+        size_t bps = av_get_bytes_per_sample(f->format);
+        memset(f->extended_data[ch], 0, bps * f->nb_samples);
+    }
+    return f;
+}
+
+/* Concatenate audio frames into an appropriately sized array of samples */
+static void move_audio(vorbis_enc_context *venc, float *audio, int *samples, int sf_size)
+{
+    AVFrame *cur = NULL;
+    int frame_size = 1 << (venc->log2_blocksize[1] - 1);
+    int subframes = frame_size / sf_size;
+
+    for (int sf = 0; sf < subframes; sf++) {
+        cur = ff_bufqueue_get(&venc->bufqueue);
+        *samples += cur->nb_samples;
+
+        for (int ch = 0; ch < venc->channels; ch++) {
+            const float *input = (float *) cur->extended_data[ch];
+            const size_t len  = cur->nb_samples * sizeof(float);
+            memcpy(audio + ch*frame_size + sf*sf_size, input, len);
+        }
+        av_frame_free(&cur);
+    }
+}
+
  static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                                 const AVFrame *frame, int *got_packet_ptr)
  {
      vorbis_enc_context *venc = avctx->priv_data;
-    float **audio = frame ? (float **)frame->extended_data : NULL;
-    int samples = frame ? frame->nb_samples : 0;
+    int i, ret, need_more;
+    int samples = 0, frame_size = 1 << (venc->log2_blocksize[1] - 1);
      vorbis_enc_mode *mode;
      vorbis_enc_mapping *mapping;
      PutBitContext pb;
-    int i, ret;
  
-    if (!apply_window_and_mdct(venc, audio, samples))
+    if (frame) {
+        if ((ret = ff_af_queue_add(&venc->afq, frame)) < 0)
+            return ret;
+        ff_bufqueue_add(avctx, &venc->bufqueue, av_frame_clone(frame));
+    } else
+        if (!venc->afq.remaining_samples)
+            return 0;
+
+    need_more = venc->bufqueue.available * avctx->frame_size < frame_size;
+    need_more = frame && need_more;
+    if (need_more)
+        return 0;
+
+    /* Pad the bufqueue with empty frames for encoding the last packet. */
+    if (!frame) {
+        if (venc->bufqueue.available * avctx->frame_size < frame_size) {
+            int frames_needed = (frame_size/avctx->frame_size) - venc->bufqueue.available;
+
+            for (int i = 0; i < frames_needed; i++) {
+               AVFrame *empty = spawn_empty_frame(avctx, venc->channels);
+               if (!empty)
+                   return AVERROR(ENOMEM);
+
+               ff_bufqueue_add(avctx, &venc->bufqueue, empty);
+            }
+        }
+    }
+
+    move_audio(venc, venc->scratch, &samples, avctx->frame_size);
+
+    if (!apply_window_and_mdct(venc, venc->scratch, samples))
          return 0;
-    samples = 1 << (venc->log2_blocksize[0] - 1);
  
      if ((ret = ff_alloc_packet2(avctx, avpkt, 8192, 0)) < 0)
          return ret;
@@ -1090,15 +1191,14 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
      flush_put_bits(&pb);
      avpkt->size = put_bits_count(&pb) >> 3;
  
-    avpkt->duration = ff_samples_to_time_base(avctx, avctx->frame_size);
-    if (frame) {
-        if (frame->pts != AV_NOPTS_VALUE)
-            avpkt->pts = ff_samples_to_time_base(avctx, frame->pts);
-    } else {
-        avpkt->pts = venc->next_pts;
+    ff_af_queue_remove(&venc->afq, frame_size, &avpkt->pts, &avpkt->duration);
+
+    if (frame_size > avpkt->duration) {
+        uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
+        if (!side)
+            return AVERROR(ENOMEM);
+        AV_WL32(&side[4], frame_size - avpkt->duration);
      }
-    if (avpkt->pts != AV_NOPTS_VALUE)
-        venc->next_pts = avpkt->pts + avpkt->duration;
  
      *got_packet_ptr = 1;
      return 0;
@@ -1155,9 +1255,13 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
      av_freep(&venc->samples);
      av_freep(&venc->floor);
      av_freep(&venc->coeffs);
+    av_freep(&venc->scratch);
+    av_freep(&venc->fdsp);
  
      ff_mdct_end(&venc->mdct[0]);
      ff_mdct_end(&venc->mdct[1]);
+    ff_af_queue_close(&venc->afq);
+    ff_bufqueue_discard_all(&venc->bufqueue);
  
      av_freep(&avctx->extradata);
  
@@ -1188,7 +1292,9 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx)
          goto error;
      avctx->extradata_size = ret;
  
-    avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1);
+    avctx->frame_size = 64;
+
+    ff_af_queue_init(avctx, &venc->afq);
  
      return 0;
  error: