2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "libavutil/attributes.h"
20 #include "libavutil/cpu.h"
21 #include "libavutil/x86/cpu.h"
22 #include "libavcodec/avcodec.h"
23 #include "libavcodec/mpegvideoencdsp.h"
25 int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
26 int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
27 int ff_pix_sum16_xop(uint8_t *pix, int line_size);
28 int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
29 int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
33 #define PHADDD(a, t) \
34 "movq " #a ", " #t " \n\t" \
35 "psrlq $32, " #a " \n\t" \
36 "paddd " #t ", " #a " \n\t"
39 * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
40 * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
41 * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
43 #define PMULHRW(x, y, s, o) \
44 "pmulhw " #s ", " #x " \n\t" \
45 "pmulhw " #s ", " #y " \n\t" \
46 "paddw " #o ", " #x " \n\t" \
47 "paddw " #o ", " #y " \n\t" \
48 "psraw $1, " #x " \n\t" \
49 "psraw $1, " #y " \n\t"
50 #define DEF(x) x ## _mmx
51 #define SET_RND MOVQ_WONE
52 #define SCALE_OFFSET 1
54 #include "mpegvideoenc_qns_template.c"
61 #define DEF(x) x ## _3dnow
63 #define SCALE_OFFSET 0
64 #define PMULHRW(x, y, s, o) \
65 "pmulhrw " #s ", " #x " \n\t" \
66 "pmulhrw " #s ", " #y " \n\t"
68 #include "mpegvideoenc_qns_template.c"
77 #define DEF(x) x ## _ssse3
79 #define SCALE_OFFSET -1
81 #define PHADDD(a, t) \
82 "pshufw $0x0E, " #a ", " #t " \n\t" \
83 /* faster than phaddd on core2 */ \
84 "paddd " #t ", " #a " \n\t"
86 #define PMULHRW(x, y, s, o) \
87 "pmulhrsw " #s ", " #x " \n\t" \
88 "pmulhrsw " #s ", " #y " \n\t"
90 #include "mpegvideoenc_qns_template.c"
97 #endif /* HAVE_SSSE3_INLINE */
99 #endif /* HAVE_INLINE_ASM */
101 av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
102 AVCodecContext *avctx)
104 int cpu_flags = av_get_cpu_flags();
106 if (EXTERNAL_MMX(cpu_flags)) {
107 c->pix_sum = ff_pix_sum16_mmx;
108 c->pix_norm1 = ff_pix_norm1_mmx;
111 if (EXTERNAL_SSE2(cpu_flags)) {
112 c->pix_sum = ff_pix_sum16_sse2;
113 c->pix_norm1 = ff_pix_norm1_sse2;
116 if (EXTERNAL_XOP(cpu_flags)) {
117 c->pix_sum = ff_pix_sum16_xop;
122 if (INLINE_MMX(cpu_flags)) {
123 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
124 c->try_8x8basis = try_8x8basis_mmx;
126 c->add_8x8basis = add_8x8basis_mmx;
129 if (INLINE_AMD3DNOW(cpu_flags)) {
130 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
131 c->try_8x8basis = try_8x8basis_3dnow;
133 c->add_8x8basis = add_8x8basis_3dnow;
136 #if HAVE_SSSE3_INLINE
137 if (INLINE_SSSE3(cpu_flags)) {
138 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
139 c->try_8x8basis = try_8x8basis_ssse3;
141 c->add_8x8basis = add_8x8basis_ssse3;
143 #endif /* HAVE_SSSE3_INLINE */
145 #endif /* HAVE_INLINE_ASM */