Implement a SIMD version of emulated_edge_mc() for x86.

[ffmpeg] / libavcodec / snow.c
diff --git a/libavcodec/snow.c b/libavcodec/snow.c

index a4758b69ebb0a4761ce54b8c854dca436208524e..9ab9fed71f22955a869d66f57d55824e4b011fbc 100644 (file)
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -920,7 +920,7 @@ static void decode_blocks(SnowContext *s){
      }
  }
  
-static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
      static const uint8_t weight[64]={
      8,7,6,5,4,3,2,1,
      7,7,0,0,0,0,0,1,
@@ -1106,9 +1106,8 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, i
  
  #define mca(dx,dy,b_w)\
  static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
-    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
      assert(h==b_w);\
-    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
+    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
  }
  
  mca( 0, 0,16)
@@ -1172,7 +1171,7 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, i
          src += sx + sy*stride;
          if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
             || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
-            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
+            s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
              src= tmp + MB_SIZE;
          }
  //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
@@ -1180,7 +1179,7 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, i
          assert(b_w>1 && b_h>1);
          assert((tab_index>=0 && tab_index<4) || b_w==32);
          if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
-            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
+            mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
          else if(b_w==32){
              int y;
              for(y=0; y<b_h; y+=16){
@@ -2081,8 +2080,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
      common_init_after_header(avctx);
  
      // realloc slice buffer for the case that spatial_decomposition_count changed
-    slice_buffer_destroy(&s->sb);
-    slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
+    ff_slice_buffer_destroy(&s->sb);
+    ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
  
      for(plane_index=0; plane_index<3; plane_index++){
          Plane *p= &s->plane[plane_index];
@@ -2199,10 +2198,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
              y = FFMIN(p->height, slice_starty);
              end_y = FFMIN(p->height, slice_h);
              while(y < end_y)
-                slice_buffer_release(&s->sb, y++);
+                ff_slice_buffer_release(&s->sb, y++);
          }
  
-        slice_buffer_flush(&s->sb);
+        ff_slice_buffer_flush(&s->sb);
          }
  
      }
@@ -2228,16 +2227,16 @@ static av_cold int decode_end(AVCodecContext *avctx)
  {
      SnowContext *s = avctx->priv_data;
  
-    slice_buffer_destroy(&s->sb);
+    ff_slice_buffer_destroy(&s->sb);
  
      common_end(s);
  
      return 0;
  }
  
-AVCodec snow_decoder = {
+AVCodec ff_snow_decoder = {
      "snow",
-    CODEC_TYPE_VIDEO,
+    AVMEDIA_TYPE_VIDEO,
      CODEC_ID_SNOW,
      sizeof(SnowContext),
      decode_init,
@@ -2829,9 +2828,9 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
           * to improve the score of the whole frame, thus iterative motion
           * estimation does not always converge. */
          if(s->avctx->me_cmp == FF_CMP_W97)
-            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+            distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
          else if(s->avctx->me_cmp == FF_CMP_W53)
-            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+            distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
          else{
              distortion = 0;
              for(i=0; i<4; i++){
@@ -3986,9 +3985,9 @@ static av_cold int encode_end(AVCodecContext *avctx)
      return 0;
  }
  
-AVCodec snow_encoder = {
+AVCodec ff_snow_encoder = {
      "snow",
-    CODEC_TYPE_VIDEO,
+    AVMEDIA_TYPE_VIDEO,
      CODEC_ID_SNOW,
      sizeof(SnowContext),
      encode_init,