]> git.sesse.net Git - ffmpeg/blob - libavcodec/x86/dsputil_x86.h
sbrdsp: move #if to disable all educational code
[ffmpeg] / libavcodec / x86 / dsputil_x86.h
1 /*
2  * MMX optimized DSP utils
3  * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 #ifndef AVCODEC_X86_DSPUTIL_X86_H
23 #define AVCODEC_X86_DSPUTIL_X86_H
24
25 #include <stddef.h>
26 #include <stdint.h>
27
28 #include "libavcodec/avcodec.h"
29 #include "libavcodec/dsputil.h"
30 #include "libavutil/x86/asm.h"
31 #include "constants.h"
32
33 #define MOVQ_WONE(regd) \
34     __asm__ volatile ( \
35     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
36     "psrlw $15, %%" #regd ::)
37
38 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
39 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
40
41 #define MOVQ_BFE(regd)                                  \
42     __asm__ volatile (                                  \
43         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
44         "paddb   %%"#regd", %%"#regd"   \n\t" ::)
45
46 #ifndef PIC
47 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
48 #else
49 // for shared library it's better to use this way for accessing constants
50 // pcmpeqd -> -1
51 #define MOVQ_WTWO(regd)                                 \
52     __asm__ volatile (                                  \
53         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
54         "psrlw         $15, %%"#regd"   \n\t"           \
55         "psllw          $1, %%"#regd"   \n\t"::)
56
57 #endif
58
59 // using regr as temporary and for the output result
60 // first argument is unmodifed and second is trashed
61 // regfe is supposed to contain 0xfefefefefefefefe
62 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)                \
63     "movq   "#rega", "#regr"            \n\t"                    \
64     "pand   "#regb", "#regr"            \n\t"                    \
65     "pxor   "#rega", "#regb"            \n\t"                    \
66     "pand  "#regfe", "#regb"            \n\t"                    \
67     "psrlq       $1, "#regb"            \n\t"                    \
68     "paddb  "#regb", "#regr"            \n\t"
69
70 #define PAVGB_MMX(rega, regb, regr, regfe)                       \
71     "movq   "#rega", "#regr"            \n\t"                    \
72     "por    "#regb", "#regr"            \n\t"                    \
73     "pxor   "#rega", "#regb"            \n\t"                    \
74     "pand  "#regfe", "#regb"            \n\t"                    \
75     "psrlq       $1, "#regb"            \n\t"                    \
76     "psubb  "#regb", "#regr"            \n\t"
77
78 // mm6 is supposed to contain 0xfefefefefefefefe
79 #define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
80     "movq  "#rega", "#regr"             \n\t"                    \
81     "movq  "#regc", "#regp"             \n\t"                    \
82     "pand  "#regb", "#regr"             \n\t"                    \
83     "pand  "#regd", "#regp"             \n\t"                    \
84     "pxor  "#rega", "#regb"             \n\t"                    \
85     "pxor  "#regc", "#regd"             \n\t"                    \
86     "pand    %%mm6, "#regb"             \n\t"                    \
87     "pand    %%mm6, "#regd"             \n\t"                    \
88     "psrlq      $1, "#regb"             \n\t"                    \
89     "psrlq      $1, "#regd"             \n\t"                    \
90     "paddb "#regb", "#regr"             \n\t"                    \
91     "paddb "#regd", "#regp"             \n\t"
92
93 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \
94     "movq  "#rega", "#regr"             \n\t"                    \
95     "movq  "#regc", "#regp"             \n\t"                    \
96     "por   "#regb", "#regr"             \n\t"                    \
97     "por   "#regd", "#regp"             \n\t"                    \
98     "pxor  "#rega", "#regb"             \n\t"                    \
99     "pxor  "#regc", "#regd"             \n\t"                    \
100     "pand    %%mm6, "#regb"             \n\t"                    \
101     "pand    %%mm6, "#regd"             \n\t"                    \
102     "psrlq      $1, "#regd"             \n\t"                    \
103     "psrlq      $1, "#regb"             \n\t"                    \
104     "psubb "#regb", "#regr"             \n\t"                    \
105     "psubb "#regd", "#regp"             \n\t"
106
107 void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx);
108 void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx);
109
110 void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
111                                int line_size);
112 void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
113                                int line_size);
114 void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
115                                       int line_size);
116
117 void ff_clear_block_mmx(int16_t *block);
118 void ff_clear_block_sse(int16_t *block);
119 void ff_clear_blocks_mmx(int16_t *blocks);
120 void ff_clear_blocks_sse(int16_t *blocks);
121
122 void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
123
124 void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
125                                         const uint8_t *diff, int w,
126                                         int *left, int *left_top);
127
128 void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
129                        int w, int h, int sides);
130
131 void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
132                 int stride, int h, int ox, int oy,
133                 int dxx, int dxy, int dyx, int dyy,
134                 int shift, int r, int width, int height);
135
136 void ff_vector_clipf_sse(float *dst, const float *src,
137                          float min, float max, int len);
138
139 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
140                         ptrdiff_t line_size, int h);
141 void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
142                          ptrdiff_t line_size, int h);
143 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
144                         ptrdiff_t line_size, int h);
145 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
146                          ptrdiff_t line_size, int h);
147 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
148                            ptrdiff_t line_size, int h);
149 void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
150                            ptrdiff_t line_size, int h);
151 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
152                           ptrdiff_t line_size, int h);
153 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
154                           ptrdiff_t line_size, int h);
155
156 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
157                            ptrdiff_t line_size, int h);
158
159 void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
160                             ptrdiff_t line_size, int h);
161 void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
162                              ptrdiff_t line_size, int h);
163
164 void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
165                             ptrdiff_t line_size, int h);
166 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
167                              ptrdiff_t line_size, int h);
168
169 void ff_deinterlace_line_mmx(uint8_t *dst,
170                              const uint8_t *lum_m4, const uint8_t *lum_m3,
171                              const uint8_t *lum_m2, const uint8_t *lum_m1,
172                              const uint8_t *lum,
173                              int size);
174
175 void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
176                                      const uint8_t *lum_m3,
177                                      const uint8_t *lum_m2,
178                                      const uint8_t *lum_m1,
179                                      const uint8_t *lum, int size);
180
181 #define PIXELS16(STATIC, PFX1, PFX2, TYPE, CPUEXT)                      \
182 STATIC void PFX1 ## _pixels16 ## TYPE ## CPUEXT(uint8_t *block,         \
183                                                 const uint8_t *pixels,  \
184                                                 ptrdiff_t line_size,    \
185                                                 int h)                  \
186 {                                                                       \
187     PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block, pixels,           \
188                                                line_size, h);           \
189     PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block + 8, pixels + 8,   \
190                                                line_size, h);           \
191 }
192
193 #endif /* AVCODEC_X86_DSPUTIL_X86_H */