]> git.sesse.net Git - ffmpeg/blob - libavcodec/x86/dsputil_x86.h
Merge commit 'e1eaaec765d2e726618633fcbd2e06fded7647a8'
[ffmpeg] / libavcodec / x86 / dsputil_x86.h
1 /*
2  * MMX optimized DSP utils
3  * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 #ifndef AVCODEC_X86_DSPUTIL_MMX_H
23 #define AVCODEC_X86_DSPUTIL_MMX_H
24
25 #include <stddef.h>
26 #include <stdint.h>
27
28 #include "libavcodec/dsputil.h"
29 #include "libavutil/x86/asm.h"
30 #include "constants.h"
31
32 #define MOVQ_WONE(regd) \
33     __asm__ volatile ( \
34     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
35     "psrlw $15, %%" #regd ::)
36
37 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
38 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
39
40 #define MOVQ_BFE(regd)                                  \
41     __asm__ volatile (                                  \
42         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
43         "paddb   %%"#regd", %%"#regd"   \n\t" ::)
44
45 #ifndef PIC
46 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
47 #else
48 // for shared library it's better to use this way for accessing constants
49 // pcmpeqd -> -1
50 #define MOVQ_WTWO(regd)                                 \
51     __asm__ volatile (                                  \
52         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
53         "psrlw         $15, %%"#regd"   \n\t"           \
54         "psllw          $1, %%"#regd"   \n\t"::)
55
56 #endif
57
58 // using regr as temporary and for the output result
59 // first argument is unmodifed and second is trashed
60 // regfe is supposed to contain 0xfefefefefefefefe
61 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)                \
62     "movq   "#rega", "#regr"            \n\t"                    \
63     "pand   "#regb", "#regr"            \n\t"                    \
64     "pxor   "#rega", "#regb"            \n\t"                    \
65     "pand  "#regfe", "#regb"            \n\t"                    \
66     "psrlq       $1, "#regb"            \n\t"                    \
67     "paddb  "#regb", "#regr"            \n\t"
68
69 #define PAVGB_MMX(rega, regb, regr, regfe)                       \
70     "movq   "#rega", "#regr"            \n\t"                    \
71     "por    "#regb", "#regr"            \n\t"                    \
72     "pxor   "#rega", "#regb"            \n\t"                    \
73     "pand  "#regfe", "#regb"            \n\t"                    \
74     "psrlq       $1, "#regb"            \n\t"                    \
75     "psubb  "#regb", "#regr"            \n\t"
76
77 // mm6 is supposed to contain 0xfefefefefefefefe
78 #define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
79     "movq  "#rega", "#regr"             \n\t"                    \
80     "movq  "#regc", "#regp"             \n\t"                    \
81     "pand  "#regb", "#regr"             \n\t"                    \
82     "pand  "#regd", "#regp"             \n\t"                    \
83     "pxor  "#rega", "#regb"             \n\t"                    \
84     "pxor  "#regc", "#regd"             \n\t"                    \
85     "pand    %%mm6, "#regb"             \n\t"                    \
86     "pand    %%mm6, "#regd"             \n\t"                    \
87     "psrlq      $1, "#regb"             \n\t"                    \
88     "psrlq      $1, "#regd"             \n\t"                    \
89     "paddb "#regb", "#regr"             \n\t"                    \
90     "paddb "#regd", "#regp"             \n\t"
91
92 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \
93     "movq  "#rega", "#regr"             \n\t"                    \
94     "movq  "#regc", "#regp"             \n\t"                    \
95     "por   "#regb", "#regr"             \n\t"                    \
96     "por   "#regd", "#regp"             \n\t"                    \
97     "pxor  "#rega", "#regb"             \n\t"                    \
98     "pxor  "#regc", "#regd"             \n\t"                    \
99     "pand    %%mm6, "#regb"             \n\t"                    \
100     "pand    %%mm6, "#regd"             \n\t"                    \
101     "psrlq      $1, "#regd"             \n\t"                    \
102     "psrlq      $1, "#regb"             \n\t"                    \
103     "psubb "#regb", "#regr"             \n\t"                    \
104     "psubb "#regd", "#regp"             \n\t"
105
106 void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
107 void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
108
109 void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
110 void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
111 void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
112
113 void ff_clear_block_mmx(int16_t *block);
114 void ff_clear_block_sse(int16_t *block);
115 void ff_clear_blocks_mmx(int16_t *blocks);
116 void ff_clear_blocks_sse(int16_t *blocks);
117
118 void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
119
120 void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
121                                         const uint8_t *diff, int w,
122                                         int *left, int *left_top);
123
124 void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
125                        int w, int h, int sides);
126
127 void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
128                 int stride, int h, int ox, int oy,
129                 int dxx, int dxy, int dyx, int dyy,
130                 int shift, int r, int width, int height);
131
132 void ff_gmc_sse(uint8_t *dst, uint8_t *src,
133                 int stride, int h, int ox, int oy,
134                 int dxx, int dxy, int dyx, int dyy,
135                 int shift, int r, int width, int height);
136
137 void ff_vector_clipf_sse(float *dst, const float *src,
138                          float min, float max, int len);
139
140 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
141                         ptrdiff_t line_size, int h);
142 void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
143                          ptrdiff_t line_size, int h);
144 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
145                         ptrdiff_t line_size, int h);
146 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
147                          ptrdiff_t line_size, int h);
148 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
149                            ptrdiff_t line_size, int h);
150 void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
151                            ptrdiff_t line_size, int h);
152 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
153                           ptrdiff_t line_size, int h);
154 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
155                           ptrdiff_t line_size, int h);
156
157 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
158                            ptrdiff_t line_size, int h);
159
160 void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
161                             ptrdiff_t line_size, int h);
162 void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
163                              ptrdiff_t line_size, int h);
164
165 void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
166                             ptrdiff_t line_size, int h);
167 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
168                              ptrdiff_t line_size, int h);
169
170
171 void ff_mmx_idct(int16_t *block);
172 void ff_mmxext_idct(int16_t *block);
173
174 void ff_deinterlace_line_mmx(uint8_t *dst,
175                              const uint8_t *lum_m4, const uint8_t *lum_m3,
176                              const uint8_t *lum_m2, const uint8_t *lum_m1,
177                              const uint8_t *lum,
178                              int size);
179
180 void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
181                                      const uint8_t *lum_m3,
182                                      const uint8_t *lum_m2,
183                                      const uint8_t *lum_m1,
184                                      const uint8_t *lum, int size);
185
186 #define PIXELS16(STATIC, PFX1, PFX2, TYPE, CPUEXT)                      \
187 STATIC void PFX1 ## _pixels16 ## TYPE ## CPUEXT(uint8_t *block,         \
188                                                 const uint8_t *pixels,  \
189                                                 ptrdiff_t line_size,    \
190                                                 int h)                  \
191 {                                                                       \
192     PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block,      pixels,      \
193                                                line_size, h);           \
194     PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block + 8,  pixels + 8,  \
195                                                line_size, h);           \
196 }
197
198 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */