ARM: NEON 16x16 and 8x8 avg qpel MC

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index d1e9da5cd4b40e2c87af087b2646c28574b92a88..1bfb1e2f258bdce2adc1581a426265c006cf8d83 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1999,6 +1999,8 @@ static void free_tables(H264Context *h){
          av_freep(&hx->s.obmc_scratchpad);
          av_freep(&hx->rbsp_buffer[1]);
          av_freep(&hx->rbsp_buffer[0]);
+        hx->rbsp_buffer_size[0] = 0;
+        hx->rbsp_buffer_size[1] = 0;
          if (i) av_freep(&h->thread_context[i]);
      }
  }
@@ -2196,11 +2198,6 @@ static av_cold int decode_init(AVCodecContext *avctx){
      if(!avctx->has_b_frames)
      s->low_delay= 1;
  
-    if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
-        avctx->pix_fmt= PIX_FMT_VDPAU_H264;
-    else
-        avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
-    avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
      avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
  
      decode_init_vlc();
@@ -3810,6 +3807,22 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
      if (!s->context_initialized) {
          if(h != h0)
              return -1;  // we cant (re-)initialize context during parallel decoding
+
+        avcodec_set_dimensions(s->avctx, s->width, s->height);
+        s->avctx->sample_aspect_ratio= h->sps.sar;
+        if(!s->avctx->sample_aspect_ratio.den)
+            s->avctx->sample_aspect_ratio.den = 1;
+
+        if(h->sps.timing_info_present_flag){
+            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
+            if(h->x264_build > 0 && h->x264_build < 44)
+                s->avctx->time_base.den *= 2;
+            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
+                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
+        }
+        s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
+        s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
+
          if (MPV_common_init(s) < 0)
              return -1;
          s->first_field = 0;
@@ -3832,20 +3845,6 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
          for(i = 0; i < s->avctx->thread_count; i++)
              if(context_init(h->thread_context[i]) < 0)
                  return -1;
-
-        s->avctx->width = s->width;
-        s->avctx->height = s->height;
-        s->avctx->sample_aspect_ratio= h->sps.sar;
-        if(!s->avctx->sample_aspect_ratio.den)
-            s->avctx->sample_aspect_ratio.den = 1;
-
-        if(h->sps.timing_info_present_flag){
-            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
-            if(h->x264_build > 0 && h->x264_build < 44)
-                s->avctx->time_base.den *= 2;
-            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
-                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
-        }
      }
  
      h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
@@ -6693,7 +6692,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
          ff_init_cabac_states( &h->cabac);
          ff_init_cabac_decoder( &h->cabac,
                                 s->gb.buffer + get_bits_count(&s->gb)/8,
-                               ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
+                               (get_bits_left(&s->gb) + 7)/8);
          /* calculate pre-state */
          for( i= 0; i < 460; i++ ) {
              int pre;
@@ -7461,7 +7460,7 @@ static void execute_decode_slices(H264Context *h, int context_count){
          }
  
          avctx->execute(avctx, (void *)decode_slice,
-                       (void **)h->thread_context, NULL, context_count, sizeof(void*));
+                       h->thread_context, NULL, context_count, sizeof(void*));
  
          /* pull back stuff from slices to master context */
          hx = h->thread_context[context_count - 1];
@@ -7795,7 +7794,7 @@ static int decode_frame(AVCodecContext *avctx,
      if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
          Picture *out = s->current_picture_ptr;
          Picture *cur = s->current_picture_ptr;
-        int i, pics, cross_idr, out_of_order, out_idx;
+        int i, pics, out_of_order, out_idx;
  
          field_end(h);
  
@@ -7899,15 +7898,15 @@ static int decode_frame(AVCodecContext *avctx,
                      out = h->delayed_pic[i];
                      out_idx = i;
                  }
-            cross_idr = !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset;
-
-            out_of_order = !cross_idr && out->poc < h->outputed_poc;
+            if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
+                h->outputed_poc= INT_MIN;
+            out_of_order = out->poc < h->outputed_poc;
  
              if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
                  { }
              else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
                 || (s->low_delay &&
-                ((!cross_idr && out->poc > h->outputed_poc + 2)
+                ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
                   || cur->pict_type == FF_B_TYPE)))
              {
                  s->low_delay = 0;
@@ -7922,7 +7921,10 @@ static int decode_frame(AVCodecContext *avctx,
              if(!out_of_order && pics > s->avctx->has_b_frames){
                  *data_size = sizeof(AVFrame);
  
-                h->outputed_poc = out->poc;
+                if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
+                    h->outputed_poc = INT_MIN;
+                } else
+                    h->outputed_poc = out->poc;
                  *pict= *(AVFrame*)out;
              }else{
                  av_log(avctx, AV_LOG_DEBUG, "no picture\n");
@@ -8188,6 +8190,7 @@ AVCodec h264_vdpau_decoder = {
      CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
      .flush= flush_dpb,
      .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
+    .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
  };
  #endif