cosmetics in dsp init

[ffmpeg] / libavcodec / dsputil.c
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c

index 055486d493b2857c344e5fab2979f4ff73d8d0ef..a9661eb2b5ef65a486a581d6c57811c90f66d8b3 100644 (file)
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -29,7 +29,6 @@
  
  #include "avcodec.h"
  #include "dsputil.h"
-#include "mpegvideo.h"
  #include "simple_idct.h"
  #include "faandct.h"
  #include "faanidct.h"
@@ -152,6 +151,32 @@ static const uint8_t simple_mmx_permutation[64]={
          0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
  };
  
+static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+
+void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
+    int i;
+    int end;
+
+    st->scantable= src_scantable;
+
+    for(i=0; i<64; i++){
+        int j;
+        j = src_scantable[i];
+        st->permutated[i] = permutation[j];
+#ifdef ARCH_POWERPC
+        st->inverse[j] = i;
+#endif
+    }
+
+    end=-1;
+    for(i=0; i<64; i++){
+        int j;
+        j = st->permutated[i];
+        if(j>end) end=j;
+        st->raster_end[i]= end;
+    }
+}
+
  static int pix_sum_c(uint8_t * pix, int line_size)
  {
      int s, i, j;
@@ -404,6 +429,106 @@ int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
  }
  #endif
  
+/* draw the edges of width 'w' of an image of size width, height */
+//FIXME check that this is ok for mpeg4 interlaced
+static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
+{
+    uint8_t *ptr, *last_line;
+    int i;
+
+    last_line = buf + (height - 1) * wrap;
+    for(i=0;i<w;i++) {
+        /* top and bottom */
+        memcpy(buf - (i + 1) * wrap, buf, width);
+        memcpy(last_line + (i + 1) * wrap, last_line, width);
+    }
+    /* left and right */
+    ptr = buf;
+    for(i=0;i<height;i++) {
+        memset(ptr - w, ptr[0], w);
+        memset(ptr + width, ptr[width-1], w);
+        ptr += wrap;
+    }
+    /* corners */
+    for(i=0;i<w;i++) {
+        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
+        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
+        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
+        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
+    }
+}
+
+/**
+ * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
+                                    int src_x, int src_y, int w, int h){
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+
+    if(src_y>= h){
+        src+= (h-1-src_y)*linesize;
+        src_y=h-1;
+    }else if(src_y<=-block_h){
+        src+= (1-block_h-src_y)*linesize;
+        src_y=1-block_h;
+    }
+    if(src_x>= w){
+        src+= (w-1-src_x);
+        src_x=w-1;
+    }else if(src_x<=-block_w){
+        src+= (1-block_w-src_x);
+        src_x=1-block_w;
+    }
+
+    start_y= FFMAX(0, -src_y);
+    start_x= FFMAX(0, -src_x);
+    end_y= FFMIN(block_h, h-src_y);
+    end_x= FFMIN(block_w, w-src_x);
+
+    // copy existing part
+    for(y=start_y; y<end_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= src[x + y*linesize];
+        }
+    }
+
+    //top
+    for(y=0; y<start_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + start_y*linesize];
+        }
+    }
+
+    //bottom
+    for(y=end_y; y<block_h; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
+        }
+    }
+
+    for(y=0; y<block_h; y++){
+       //left
+        for(x=0; x<start_x; x++){
+            buf[x + y*linesize]= buf[start_x + y*linesize];
+        }
+
+       //right
+        for(x=end_x; x<block_w; x++){
+            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
+        }
+    }
+}
+
  static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
  {
      int i;
@@ -3573,7 +3698,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
      DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
      DCTELEM * const temp= (DCTELEM*)aligned_temp;
      uint8_t * const bak= (uint8_t*)aligned_bak;
-    int i, last, run, bits, level, distoration, start_i;
+    int i, last, run, bits, level, distortion, start_i;
      const int esc_length= s->ac_esc_length;
      uint8_t * length;
      uint8_t * last_length;
@@ -3640,9 +3765,9 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
  
      s->dsp.idct_add(bak, stride, temp);
  
-    distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
+    distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8);
  
-    return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
+    return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
  }
  
  static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
@@ -3805,20 +3930,74 @@ void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
          dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
  }
  
-void ff_float_to_int16_c(int16_t *dst, const float *src, int len){
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
+    int i,j;
+    dst += len;
+    win += len;
+    src0+= len;
+    for(i=-len, j=len-1; i<0; i++, j--) {
+        float s0 = src0[i];
+        float s1 = src1[j];
+        float wi = win[i];
+        float wj = win[j];
+        dst[i] = s0*wj - s1*wi + add_bias;
+        dst[j] = s0*wi + s1*wj + add_bias;
+    }
+}
+
+static av_always_inline int float_to_int16_one(const float *src){
+    int_fast32_t tmp = *(const int32_t*)src;
+    if(tmp & 0xf0000){
+        tmp = (0x43c0ffff - tmp)>>31;
+        // is this faster on some gcc/cpu combinations?
+//      if(tmp > 0x43c0ffff) tmp = 0xFFFF;
+//      else                 tmp = 0;
+    }
+    return tmp - 0x8000;
+}
+
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
      int i;
-    for(i=0; i<len; i++) {
-        int_fast32_t tmp = ((const int32_t*)src)[i];
-        if(tmp & 0xf0000){
-            tmp = (0x43c0ffff - tmp)>>31;
-            // is this faster on some gcc/cpu combinations?
-//          if(tmp > 0x43c0ffff) tmp = 0xFFFF;
-//          else                 tmp = 0;
+    for(i=0; i<len; i++)
+        dst[i] = float_to_int16_one(src+i);
+}
+
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
+    int i,j,c;
+    if(channels==2){
+        for(i=0; i<len; i++){
+            dst[2*i]   = float_to_int16_one(src[0]+i);
+            dst[2*i+1] = float_to_int16_one(src[1]+i);
          }
-        dst[i] = tmp - 0x8000;
+    }else{
+        for(c=0; c<channels; c++)
+            for(i=0, j=c; i<len; i++, j+=channels)
+                dst[j] = float_to_int16_one(src[c]+i);
      }
  }
  
+static void add_int16_c(int16_t * v1, int16_t * v2, int order)
+{
+    while (order--)
+       *v1++ += *v2++;
+}
+
+static void sub_int16_c(int16_t * v1, int16_t * v2, int order)
+{
+    while (order--)
+        *v1++ -= *v2++;
+}
+
+static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
+{
+    int res = 0;
+
+    while (order--)
+        res += (*v1++ * *v2++) >> shift;
+
+    return res;
+}
+
  #define W0 2048
  #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
  #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
@@ -4203,6 +4382,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
      c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
      c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
  
+    c->draw_edges = draw_edges_c;
+
  #ifdef CONFIG_CAVS_DECODER
      ff_cavsdsp_init(c,avctx);
  #endif
@@ -4301,7 +4482,12 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
      c->vector_fmul = vector_fmul_c;
      c->vector_fmul_reverse = vector_fmul_reverse_c;
      c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
+    c->vector_fmul_window = ff_vector_fmul_window_c;
      c->float_to_int16 = ff_float_to_int16_c;
+    c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
+    c->add_int16 = add_int16_c;
+    c->sub_int16 = sub_int16_c;
+    c->scalarproduct_int16 = scalarproduct_int16_c;
  
      c->shrink[0]= ff_img_copy_plane;
      c->shrink[1]= ff_shrink22;
@@ -4351,6 +4537,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
          for(i=0; i<64; i++)
              c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
          break;
+    case FF_SSE2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
+        break;
      default:
          av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
      }