]> git.sesse.net Git - ffmpeg/blob - libavcodec/x86/dsputil_mmx.h
configure: Rename cmov processor capability to i686
[ffmpeg] / libavcodec / x86 / dsputil_mmx.h
1 /*
2  * MMX optimized DSP utils
3  * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 #ifndef AVCODEC_X86_DSPUTIL_MMX_H
23 #define AVCODEC_X86_DSPUTIL_MMX_H
24
25 #include <stddef.h>
26 #include <stdint.h>
27
28 #include "libavcodec/dsputil.h"
29 #include "libavutil/x86/asm.h"
30 #include "constants.h"
31
32 #define MOVQ_WONE(regd) \
33     __asm__ volatile ( \
34     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
35     "psrlw $15, %%" #regd ::)
36
37 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
38 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
39
40 #define MOVQ_BFE(regd)                                  \
41     __asm__ volatile (                                  \
42         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
43         "paddb   %%"#regd", %%"#regd"   \n\t" ::)
44
45 #ifndef PIC
46 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
47 #else
48 // for shared library it's better to use this way for accessing constants
49 // pcmpeqd -> -1
50 #define MOVQ_WTWO(regd)                                 \
51     __asm__ volatile (                                  \
52         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
53         "psrlw         $15, %%"#regd"   \n\t"           \
54         "psllw          $1, %%"#regd"   \n\t"::)
55
56 #endif
57
58 // using regr as temporary and for the output result
59 // first argument is unmodifed and second is trashed
60 // regfe is supposed to contain 0xfefefefefefefefe
61 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)                \
62     "movq   "#rega", "#regr"            \n\t"                    \
63     "pand   "#regb", "#regr"            \n\t"                    \
64     "pxor   "#rega", "#regb"            \n\t"                    \
65     "pand  "#regfe", "#regb"            \n\t"                    \
66     "psrlq       $1, "#regb"            \n\t"                    \
67     "paddb  "#regb", "#regr"            \n\t"
68
69 #define PAVGB_MMX(rega, regb, regr, regfe)                       \
70     "movq   "#rega", "#regr"            \n\t"                    \
71     "por    "#regb", "#regr"            \n\t"                    \
72     "pxor   "#rega", "#regb"            \n\t"                    \
73     "pand  "#regfe", "#regb"            \n\t"                    \
74     "psrlq       $1, "#regb"            \n\t"                    \
75     "psubb  "#regb", "#regr"            \n\t"
76
77 // mm6 is supposed to contain 0xfefefefefefefefe
78 #define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
79     "movq  "#rega", "#regr"             \n\t"                    \
80     "movq  "#regc", "#regp"             \n\t"                    \
81     "pand  "#regb", "#regr"             \n\t"                    \
82     "pand  "#regd", "#regp"             \n\t"                    \
83     "pxor  "#rega", "#regb"             \n\t"                    \
84     "pxor  "#regc", "#regd"             \n\t"                    \
85     "pand    %%mm6, "#regb"             \n\t"                    \
86     "pand    %%mm6, "#regd"             \n\t"                    \
87     "psrlq      $1, "#regb"             \n\t"                    \
88     "psrlq      $1, "#regd"             \n\t"                    \
89     "paddb "#regb", "#regr"             \n\t"                    \
90     "paddb "#regd", "#regp"             \n\t"
91
92 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \
93     "movq  "#rega", "#regr"             \n\t"                    \
94     "movq  "#regc", "#regp"             \n\t"                    \
95     "por   "#regb", "#regr"             \n\t"                    \
96     "por   "#regd", "#regp"             \n\t"                    \
97     "pxor  "#rega", "#regb"             \n\t"                    \
98     "pxor  "#regc", "#regd"             \n\t"                    \
99     "pand    %%mm6, "#regb"             \n\t"                    \
100     "pand    %%mm6, "#regd"             \n\t"                    \
101     "psrlq      $1, "#regd"             \n\t"                    \
102     "psrlq      $1, "#regb"             \n\t"                    \
103     "psubb "#regb", "#regr"             \n\t"                    \
104     "psubb "#regd", "#regp"             \n\t"
105
106 void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
107 void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
108
109 void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
110 void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
111 void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
112
113
114 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
115                         ptrdiff_t line_size, int h);
116 void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
117                          ptrdiff_t line_size, int h);
118 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
119                         ptrdiff_t line_size, int h);
120 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
121                          ptrdiff_t line_size, int h);
122 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
123                            ptrdiff_t line_size, int h);
124 void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
125                            ptrdiff_t line_size, int h);
126 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
127                           ptrdiff_t line_size, int h);
128 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
129                           ptrdiff_t line_size, int h);
130
131 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
132                            ptrdiff_t line_size, int h);
133
134 void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
135                             ptrdiff_t line_size, int h);
136 void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
137                              ptrdiff_t line_size, int h);
138
139 void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
140                             ptrdiff_t line_size, int h);
141 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
142                              ptrdiff_t line_size, int h);
143
144 void ff_deinterlace_line_mmx(uint8_t *dst,
145                              const uint8_t *lum_m4, const uint8_t *lum_m3,
146                              const uint8_t *lum_m2, const uint8_t *lum_m1,
147                              const uint8_t *lum,
148                              int size);
149
150 void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
151                                      const uint8_t *lum_m3,
152                                      const uint8_t *lum_m2,
153                                      const uint8_t *lum_m1,
154                                      const uint8_t *lum, int size);
155
156 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */