]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/dsputil.c
x86: ff_get_cpu_flags_x86(): Avoid a pointless variable indirection
[ffmpeg] / libavcodec / dsputil.c
index 5c1039b028a801bfe3431a6518c37c623c9b6bc1..5c705ea169af3d1dcd7dd909efb1c2818e6852f1 100644 (file)
@@ -36,9 +36,7 @@
 #include "mathops.h"
 #include "mpegvideo.h"
 #include "config.h"
-#include "ac3dec.h"
 #include "vorbis.h"
-#include "png.h"
 
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t ff_squareTbl[512] = {0, };
@@ -83,7 +81,7 @@ const uint8_t ff_zigzag248_direct[64] = {
 };
 
 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
-DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
+DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
 
 const uint8_t ff_alternate_horizontal_scan[64] = {
     0,  1,   2,  3,  8,  9, 16, 17,
@@ -131,9 +129,6 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
         int j;
         j = src_scantable[i];
         st->permutated[i] = permutation[j];
-#if ARCH_PPC
-        st->inverse[j] = i;
-#endif
     }
 
     end=-1;
@@ -363,65 +358,30 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
 }
 
 
-void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
-                             int line_size)
-{
-    int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    /* read the pixels */
-    for(i=0;i<8;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
-        pixels[2] = cm[block[2]];
-        pixels[3] = cm[block[3]];
-        pixels[4] = cm[block[4]];
-        pixels[5] = cm[block[5]];
-        pixels[6] = cm[block[6]];
-        pixels[7] = cm[block[7]];
-
-        pixels += line_size;
-        block += 8;
-    }
-}
-
-static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
-                                 int line_size)
-{
-    int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    /* read the pixels */
-    for(i=0;i<4;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
-        pixels[2] = cm[block[2]];
-        pixels[3] = cm[block[3]];
-
-        pixels += line_size;
-        block += 8;
-    }
-}
-
-static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
                                  int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
-    for(i=0;i<2;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
+    for(i=0;i<8;i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
+        pixels[4] = av_clip_uint8(block[4]);
+        pixels[5] = av_clip_uint8(block[5]);
+        pixels[6] = av_clip_uint8(block[6]);
+        pixels[7] = av_clip_uint8(block[7]);
 
         pixels += line_size;
         block += 8;
     }
 }
 
-void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
-                                    uint8_t *restrict pixels,
-                                    int line_size)
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
+                                        uint8_t *restrict pixels,
+                                        int line_size)
 {
     int i, j;
 
@@ -440,54 +400,21 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
     }
 }
 
-void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
-                             int line_size)
+static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
     for(i=0;i<8;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
-        pixels[2] = cm[pixels[2] + block[2]];
-        pixels[3] = cm[pixels[3] + block[3]];
-        pixels[4] = cm[pixels[4] + block[4]];
-        pixels[5] = cm[pixels[5] + block[5]];
-        pixels[6] = cm[pixels[6] + block[6]];
-        pixels[7] = cm[pixels[7] + block[7]];
-        pixels += line_size;
-        block += 8;
-    }
-}
-
-static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
-                          int line_size)
-{
-    int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    /* read the pixels */
-    for(i=0;i<4;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
-        pixels[2] = cm[pixels[2] + block[2]];
-        pixels[3] = cm[pixels[3] + block[3]];
-        pixels += line_size;
-        block += 8;
-    }
-}
-
-static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
-                          int line_size)
-{
-    int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    /* read the pixels */
-    for(i=0;i<2;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+        pixels[4] = av_clip_uint8(pixels[4] + block[4]);
+        pixels[5] = av_clip_uint8(pixels[5] + block[5]);
+        pixels[6] = av_clip_uint8(pixels[6] + block[6]);
+        pixels[7] = av_clip_uint8(pixels[7] + block[7]);
         pixels += line_size;
         block += 8;
     }
@@ -1882,17 +1809,6 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
         dst[i+0] += src[i+0];
 }
 
-static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
-    long i;
-    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
-        long a = *(long*)(src1+i);
-        long b = *(long*)(src2+i);
-        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
-    }
-    for(; i<w; i++)
-        dst[i] = src1[i]+src2[i];
-}
-
 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
     long i;
 #if !HAVE_FAST_UNALIGNED
@@ -2446,12 +2362,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
 
-static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
-    int i;
-    for(i=0; i<len; i++)
-        dst[i] = src0[i] * src1[i];
-}
-
 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
     int i;
     src1 += len-1;
@@ -2490,14 +2400,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
         dst[i] = src[i] * mul;
 }
 
-static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
-                                 int len)
-{
-    int i;
-    for (i = 0; i < len; i++)
-        dst[i] += src[i] * mul;
-}
-
 static void butterflies_float_c(float *restrict v1, float *restrict v2,
                                 int len)
 {
@@ -2521,7 +2423,7 @@ static void butterflies_float_interleave_c(float *dst, const float *src0,
     }
 }
 
-static float scalarproduct_float_c(const float *v1, const float *v2, int len)
+float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
 {
     float p = 0.0;
     int i;
@@ -2577,12 +2479,12 @@ static void vector_clipf_c(float *dst, const float *src, float min, float max, i
     }
 }
 
-static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
+static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
 {
     int res = 0;
 
     while (order--)
-        res += (*v1++ * *v2++) >> shift;
+        res += *v1++ * *v2++;
 
     return res;
 }
@@ -2703,63 +2605,28 @@ void ff_wmv2_idct_c(short * block){
 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
 {
     ff_wmv2_idct_c(block);
-    ff_put_pixels_clamped_c(block, dest, line_size);
+    put_pixels_clamped_c(block, dest, line_size);
 }
 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
 {
     ff_wmv2_idct_c(block);
-    ff_add_pixels_clamped_c(block, dest, line_size);
+    add_pixels_clamped_c(block, dest, line_size);
 }
 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
 {
-    j_rev_dct (block);
-    ff_put_pixels_clamped_c(block, dest, line_size);
+    ff_j_rev_dct (block);
+    put_pixels_clamped_c(block, dest, line_size);
 }
 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
 {
-    j_rev_dct (block);
-    ff_add_pixels_clamped_c(block, dest, line_size);
-}
-
-static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct4 (block);
-    put_pixels_clamped4_c(block, dest, line_size);
-}
-static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct4 (block);
-    add_pixels_clamped4_c(block, dest, line_size);
-}
-
-static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct2 (block);
-    put_pixels_clamped2_c(block, dest, line_size);
-}
-static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct2 (block);
-    add_pixels_clamped2_c(block, dest, line_size);
-}
-
-static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    dest[0] = cm[(block[0] + 4)>>3];
-}
-static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+    ff_j_rev_dct (block);
+    add_pixels_clamped_c(block, dest, line_size);
 }
 
 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
 
 /* init static data */
-av_cold void dsputil_static_init(void)
+av_cold void ff_dsputil_static_init(void)
 {
     int i;
 
@@ -2773,7 +2640,7 @@ av_cold void dsputil_static_init(void)
         ff_squareTbl[i] = (i - 256) * (i - 256);
     }
 
-    for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
+    for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
 }
 
 int ff_check_alignment(void){
@@ -2796,9 +2663,9 @@ int ff_check_alignment(void){
     return 0;
 }
 
-av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
+av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
 {
-    int i;
+    int i, j;
 
     ff_check_alignment();
 
@@ -2808,8 +2675,8 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
         c->fdct248 = ff_fdct248_islow_10;
     } else {
         if(avctx->dct_algo==FF_DCT_FASTINT) {
-            c->fdct    = fdct_ifast;
-            c->fdct248 = fdct_ifast248;
+            c->fdct    = ff_fdct_ifast;
+            c->fdct248 = ff_fdct_ifast248;
         }
         else if(avctx->dct_algo==FF_DCT_FAAN) {
             c->fdct    = ff_faandct;
@@ -2822,39 +2689,17 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     }
 #endif //CONFIG_ENCODERS
 
-    if(avctx->lowres==1){
-        c->idct_put= ff_jref_idct4_put;
-        c->idct_add= ff_jref_idct4_add;
-        c->idct    = j_rev_dct4;
-        c->idct_permutation_type= FF_NO_IDCT_PERM;
-    }else if(avctx->lowres==2){
-        c->idct_put= ff_jref_idct2_put;
-        c->idct_add= ff_jref_idct2_add;
-        c->idct    = j_rev_dct2;
-        c->idct_permutation_type= FF_NO_IDCT_PERM;
-    }else if(avctx->lowres==3){
-        c->idct_put= ff_jref_idct1_put;
-        c->idct_add= ff_jref_idct1_add;
-        c->idct    = j_rev_dct1;
-        c->idct_permutation_type= FF_NO_IDCT_PERM;
-    }else{
-        if (avctx->bits_per_raw_sample == 10) {
-            c->idct_put              = ff_simple_idct_put_10;
-            c->idct_add              = ff_simple_idct_add_10;
-            c->idct                  = ff_simple_idct_10;
-            c->idct_permutation_type = FF_NO_IDCT_PERM;
-        } else {
+    if (avctx->bits_per_raw_sample == 10) {
+        c->idct_put              = ff_simple_idct_put_10;
+        c->idct_add              = ff_simple_idct_add_10;
+        c->idct                  = ff_simple_idct_10;
+        c->idct_permutation_type = FF_NO_IDCT_PERM;
+    } else {
         if(avctx->idct_algo==FF_IDCT_INT){
             c->idct_put= ff_jref_idct_put;
             c->idct_add= ff_jref_idct_add;
-            c->idct    = j_rev_dct;
+            c->idct    = ff_j_rev_dct;
             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
-        }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
-                avctx->idct_algo==FF_IDCT_VP3){
-            c->idct_put= ff_vp3_idct_put_c;
-            c->idct_add= ff_vp3_idct_add_c;
-            c->idct    = ff_vp3_idct_c;
-            c->idct_permutation_type= FF_NO_IDCT_PERM;
         }else if(avctx->idct_algo==FF_IDCT_WMV2){
             c->idct_put= ff_wmv2_idct_put_c;
             c->idct_add= ff_wmv2_idct_add_c;
@@ -2865,22 +2710,18 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
             c->idct_add= ff_faanidct_add;
             c->idct    = ff_faanidct;
             c->idct_permutation_type= FF_NO_IDCT_PERM;
-        }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
-            c->idct_put= ff_ea_idct_put_c;
-            c->idct_permutation_type= FF_NO_IDCT_PERM;
         }else{ //accurate/default
             c->idct_put = ff_simple_idct_put_8;
             c->idct_add = ff_simple_idct_add_8;
             c->idct     = ff_simple_idct_8;
             c->idct_permutation_type= FF_NO_IDCT_PERM;
         }
-        }
     }
 
     c->diff_pixels = diff_pixels_c;
-    c->put_pixels_clamped = ff_put_pixels_clamped_c;
-    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
-    c->add_pixels_clamped = ff_add_pixels_clamped_c;
+    c->put_pixels_clamped = put_pixels_clamped_c;
+    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+    c->add_pixels_clamped = add_pixels_clamped_c;
     c->sum_abs_dctelem = sum_abs_dctelem_c;
     c->gmc1 = gmc1_c;
     c->gmc = ff_gmc_c;
@@ -2955,9 +2796,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
     ff_mlp_init(c, avctx);
 #endif
-#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
-    ff_intrax8dsp_init(c,avctx);
-#endif
 
     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
@@ -3003,7 +2841,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
 
     c->add_bytes= add_bytes_c;
-    c->add_bytes_l2= add_bytes_l2_c;
     c->diff_bytes= diff_bytes_c;
     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
@@ -3011,33 +2848,20 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
     c->bswap_buf= bswap_buf;
     c->bswap16_buf = bswap16_buf;
-#if CONFIG_PNG_DECODER
-    c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
-#endif
 
     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
         c->h263_h_loop_filter= h263_h_loop_filter_c;
         c->h263_v_loop_filter= h263_v_loop_filter_c;
     }
 
-    if (CONFIG_VP3_DECODER) {
-        c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
-        c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
-        c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
-    }
-
     c->h261_loop_filter= h261_loop_filter_c;
 
     c->try_8x8basis= try_8x8basis_c;
     c->add_8x8basis= add_8x8basis_c;
 
 #if CONFIG_VORBIS_DECODER
-    c->vorbis_inverse_coupling = vorbis_inverse_coupling;
-#endif
-#if CONFIG_AC3_DECODER
-    c->ac3_downmix = ff_ac3_downmix_c;
+    c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
 #endif
-    c->vector_fmul = vector_fmul_c;
     c->vector_fmul_reverse = vector_fmul_reverse_c;
     c->vector_fmul_add = vector_fmul_add_c;
     c->vector_fmul_window = vector_fmul_window_c;
@@ -3046,11 +2870,10 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
     c->apply_window_int16 = apply_window_int16_c;
     c->vector_clip_int32 = vector_clip_int32_c;
-    c->scalarproduct_float = scalarproduct_float_c;
+    c->scalarproduct_float = ff_scalarproduct_float_c;
     c->butterflies_float = butterflies_float_c;
     c->butterflies_float_interleave = butterflies_float_interleave_c;
     c->vector_fmul_scalar = vector_fmul_scalar_c;
-    c->vector_fmac_scalar = vector_fmac_scalar_c;
 
     c->shrink[0]= av_image_copy_plane;
     c->shrink[1]= ff_shrink22;
@@ -3147,28 +2970,29 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
         }
         break;
     default:
-        av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
-    case 8:
         BIT_DEPTH_FUNCS(8, _16);
         break;
     }
 
 
-    if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
-    if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
-    if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
-    if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
-    if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
-    if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
-    if (HAVE_MMI)        dsputil_init_mmi   (c, avctx);
-    if (ARCH_SH4)        dsputil_init_sh4   (c, avctx);
-    if (ARCH_BFIN)       dsputil_init_bfin  (c, avctx);
-
-    for(i=0; i<64; i++){
-        if(!c->put_2tap_qpel_pixels_tab[0][i])
-            c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
-        if(!c->avg_2tap_qpel_pixels_tab[0][i])
-            c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
+    if (HAVE_MMX)        ff_dsputil_init_mmx   (c, avctx);
+    if (ARCH_ARM)        ff_dsputil_init_arm   (c, avctx);
+    if (HAVE_VIS)        ff_dsputil_init_vis   (c, avctx);
+    if (ARCH_ALPHA)      ff_dsputil_init_alpha (c, avctx);
+    if (ARCH_PPC)        ff_dsputil_init_ppc   (c, avctx);
+    if (HAVE_MMI)        ff_dsputil_init_mmi   (c, avctx);
+    if (ARCH_SH4)        ff_dsputil_init_sh4   (c, avctx);
+    if (ARCH_BFIN)       ff_dsputil_init_bfin  (c, avctx);
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 16; j++) {
+            if(!c->put_2tap_qpel_pixels_tab[i][j])
+                c->put_2tap_qpel_pixels_tab[i][j] =
+                    c->put_h264_qpel_pixels_tab[i][j];
+            if(!c->avg_2tap_qpel_pixels_tab[i][j])
+                c->avg_2tap_qpel_pixels_tab[i][j] =
+                    c->avg_h264_qpel_pixels_tab[i][j];
+        }
     }
 
     ff_init_scantable_permutation(c->idct_permutation,