]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/rv40dsp.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / rv40dsp.c
index 77f2002684649f3558ed026394ef32c785be3ce2..625ce9ebbdfa340b62e3417526a56eed397604a2 100644 (file)
@@ -2,20 +2,20 @@
  * RV40 decoder motion compensation functions
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
 
 #include "avcodec.h"
 #include "dsputil.h"
+#include "rv34dsp.h"
 
 #define RV40_LOWPASS(OPNAME, OP) \
 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
@@ -105,10 +106,6 @@ static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, i
     OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
 }\
 \
-static void OPNAME ## rv40_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
-    OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 20, 5);\
-}\
-\
 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
     OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
 }\
@@ -138,10 +135,6 @@ static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, i
     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
 }\
 \
-static void OPNAME ## rv40_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
-    OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 20, 5);\
-}\
-\
 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
     uint8_t full[SIZE*(SIZE+5)];\
     uint8_t * const full_mid = full + SIZE*2;\
@@ -284,70 +277,97 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
 RV40_CHROMA_MC(put_, op_put)
 RV40_CHROMA_MC(avg_, op_avg)
 
-void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx) {
-    c->put_rv40_qpel_pixels_tab[0][ 0] = c->put_h264_qpel_pixels_tab[0][0];
-    c->put_rv40_qpel_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
-    c->put_rv40_qpel_pixels_tab[0][ 2] = put_rv40_qpel16_mc20_c;
-    c->put_rv40_qpel_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
-    c->put_rv40_qpel_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
-    c->put_rv40_qpel_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
-    c->put_rv40_qpel_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
-    c->put_rv40_qpel_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
-    c->put_rv40_qpel_pixels_tab[0][ 8] = put_rv40_qpel16_mc02_c;
-    c->put_rv40_qpel_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
-    c->put_rv40_qpel_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
-    c->put_rv40_qpel_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
-    c->put_rv40_qpel_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
-    c->put_rv40_qpel_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
-    c->put_rv40_qpel_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 0] = c->avg_h264_qpel_pixels_tab[0][0];
-    c->avg_rv40_qpel_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 2] = avg_rv40_qpel16_mc20_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 8] = avg_rv40_qpel16_mc02_c;
-    c->avg_rv40_qpel_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
-    c->avg_rv40_qpel_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
-    c->avg_rv40_qpel_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
-    c->avg_rv40_qpel_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
-    c->avg_rv40_qpel_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
-    c->avg_rv40_qpel_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
-    c->put_rv40_qpel_pixels_tab[1][ 0] = c->put_h264_qpel_pixels_tab[1][0];
-    c->put_rv40_qpel_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
-    c->put_rv40_qpel_pixels_tab[1][ 2] = put_rv40_qpel8_mc20_c;
-    c->put_rv40_qpel_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
-    c->put_rv40_qpel_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
-    c->put_rv40_qpel_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
-    c->put_rv40_qpel_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
-    c->put_rv40_qpel_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
-    c->put_rv40_qpel_pixels_tab[1][ 8] = put_rv40_qpel8_mc02_c;
-    c->put_rv40_qpel_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
-    c->put_rv40_qpel_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
-    c->put_rv40_qpel_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
-    c->put_rv40_qpel_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
-    c->put_rv40_qpel_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
-    c->put_rv40_qpel_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 0] = c->avg_h264_qpel_pixels_tab[1][0];
-    c->avg_rv40_qpel_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 2] = avg_rv40_qpel8_mc20_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 8] = avg_rv40_qpel8_mc02_c;
-    c->avg_rv40_qpel_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
-    c->avg_rv40_qpel_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
-    c->avg_rv40_qpel_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
-    c->avg_rv40_qpel_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
-    c->avg_rv40_qpel_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
-    c->avg_rv40_qpel_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
+#define RV40_WEIGHT_FUNC(size) \
+static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\
+{\
+    int i, j;\
+\
+    for (j = 0; j < size; j++) {\
+        for (i = 0; i < size; i++)\
+            dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
+        src1 += stride;\
+        src2 += stride;\
+        dst  += stride;\
+    }\
+}
+
+RV40_WEIGHT_FUNC(16)
+RV40_WEIGHT_FUNC(8)
+
+av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
+    c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0];
+    c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
+    c->put_pixels_tab[0][ 2] = dsp->put_h264_qpel_pixels_tab[0][2];
+    c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
+    c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
+    c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
+    c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
+    c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
+    c->put_pixels_tab[0][ 8] = dsp->put_h264_qpel_pixels_tab[0][8];
+    c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
+    c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
+    c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
+    c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
+    c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
+    c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
+    c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_c;
+    c->avg_pixels_tab[0][ 0] = dsp->avg_h264_qpel_pixels_tab[0][0];
+    c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
+    c->avg_pixels_tab[0][ 2] = dsp->avg_h264_qpel_pixels_tab[0][2];
+    c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
+    c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
+    c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
+    c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
+    c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
+    c->avg_pixels_tab[0][ 8] = dsp->avg_h264_qpel_pixels_tab[0][8];
+    c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
+    c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
+    c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
+    c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
+    c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
+    c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
+    c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_c;
+    c->put_pixels_tab[1][ 0] = dsp->put_h264_qpel_pixels_tab[1][0];
+    c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
+    c->put_pixels_tab[1][ 2] = dsp->put_h264_qpel_pixels_tab[1][2];
+    c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
+    c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
+    c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
+    c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
+    c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
+    c->put_pixels_tab[1][ 8] = dsp->put_h264_qpel_pixels_tab[1][8];
+    c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
+    c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
+    c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
+    c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
+    c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
+    c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
+    c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_c;
+    c->avg_pixels_tab[1][ 0] = dsp->avg_h264_qpel_pixels_tab[1][0];
+    c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
+    c->avg_pixels_tab[1][ 2] = dsp->avg_h264_qpel_pixels_tab[1][2];
+    c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
+    c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
+    c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
+    c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
+    c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
+    c->avg_pixels_tab[1][ 8] = dsp->avg_h264_qpel_pixels_tab[1][8];
+    c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
+    c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
+    c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
+    c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
+    c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
+    c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
+    c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_c;
+
+    c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
+    c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
+    c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
+    c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
+
+    c->rv40_weight_pixels_tab[0] = rv40_weight_func_16;
+    c->rv40_weight_pixels_tab[1] = rv40_weight_func_8;
 
-    c->put_rv40_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
-    c->put_rv40_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
-    c->avg_rv40_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
-    c->avg_rv40_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
+    if (HAVE_MMX)
+        ff_rv40dsp_init_x86(c, dsp);
 }