OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct_init.o
OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
+OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o
-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
+OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
+OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
+OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
+OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o
+OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
+ OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
+OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \
+ x86/vp9dsp_init_10bpp.o \
+ x86/vp9dsp_init_12bpp.o \
+ x86/vp9dsp_init_16bpp.o
+OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o
# GCC inline assembly optimizations
x86/vp8dsp_loopfilter.o
# decoders/encoders
-YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
-YASM-OBJS-$(CONFIG_APE_DECODER) += x86/apedsp.o
-YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o
+YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/aacpsdsp.o \
+ x86/sbrdsp.o
+YASM-OBJS-$(CONFIG_AAC_ENCODER) += x86/aacencdsp.o
+YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
+YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
+YASM-OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp.o
+YASM-OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp.o
+YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o x86/synth_filter.o
+YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp.o \
+ x86/dirac_dwt.o
YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
-YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o \
- x86/hevc_mc.o \
- x86/hevc_idct.o
+YASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
+ifdef CONFIG_GPL
+YASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
+endif
+YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \
+ x86/hevc_deblock.o \
+ x86/hevc_idct.o \
+ x86/hevc_res_add.o \
+ x86/hevc_sao.o \
+ x86/hevc_sao_10bit.o
+YASM-OBJS-$(CONFIG_JPEG2000_DECODER) += x86/jpeg2000dsp.o
+YASM-OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
+YASM-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
+YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
+YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
+YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
+YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
+YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
+YASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
+YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
+ YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
-YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp.o
+YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
+ x86/vp9intrapred_16bpp.o \
+ x86/vp9itxfm.o \
+ x86/vp9itxfm_16bpp.o \
+ x86/vp9lpf.o \
+ x86/vp9lpf_16bpp.o \
+ x86/vp9mc.o \
+ x86/vp9mc_16bpp.o
+YASM-OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp.o
PUT_NO_RND_PIXELS8_X2
- ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
- %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
- cglobal put_no_rnd_pixels8_x2_exact, 4,5
- lea r4, [r2*3]
- pcmpeqb m6, m6
- .loop:
- mova m0, [r1]
- mova m2, [r1+r2]
- mova m1, [r1+1]
- mova m3, [r1+r2+1]
- pxor m0, m6
- pxor m2, m6
- pxor m1, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0], m0
- mova [r0+r2], m2
- mova m0, [r1+r2*2]
- mova m1, [r1+r2*2+1]
- mova m2, [r1+r4]
- mova m3, [r1+r4+1]
- pxor m0, m6
- pxor m1, m6
- pxor m2, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0+r2*2], m0
- mova [r0+r4], m2
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- REP_RET
- %endmacro
-
- INIT_MMX mmxext
- PUT_NO_RND_PIXELS8_X2_EXACT
- INIT_MMX 3dnow
- PUT_NO_RND_PIXELS8_X2_EXACT
-
-
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0
+%if cpuflag(sse2)
+cglobal put_pixels16_y2, 4,5,3
+%else
cglobal put_pixels8_y2, 4,5
+%endif
lea r4, [r2*2]
- mova m0, [r1]
+ movu m0, [r1]
sub r0, r2
.loop:
- mova m1, [r1+r2]
- mova m2, [r1+r4]
+ movu m1, [r1+r2]
+ movu m2, [r1+r4]
add r1, r4
PAVGB m0, m1
PAVGB m1, m2
void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
+
#endif /* AVCODEC_X86_HPELDSP_H */
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
- c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
+ c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
+ c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
}
-
- if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
- }
#endif /* HAVE_MMXEXT_EXTERNAL */
}
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
- c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
+ c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
+ c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
}
-
- if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
- }
#endif /* HAVE_AMD3DNOW_EXTERNAL */
}
if (EXTERNAL_SSE2(cpu_flags))
hpeldsp_init_sse2(c, flags, cpu_flags);
+ if (EXTERNAL_SSSE3(cpu_flags))
+ hpeldsp_init_ssse3(c, flags, cpu_flags);
++
+ if (CONFIG_VP3_DECODER)
+ ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
}
--- /dev/null
-;* This file is part of Libav.
+ ;******************************************************************************
+ ;* SIMD-optimized halfpel functions for VP3
+ ;*
-;* Libav is free software; you can redistribute it and/or
++;* This file is part of FFmpeg.
+ ;*
-;* Libav is distributed in the hope that it will be useful,
++;* FFmpeg is free software; you can redistribute it and/or
+ ;* modify it under the terms of the GNU Lesser General Public
+ ;* License as published by the Free Software Foundation; either
+ ;* version 2.1 of the License, or (at your option) any later version.
+ ;*
-;* License along with Libav; if not, write to the Free Software
++;* FFmpeg is distributed in the hope that it will be useful,
+ ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ ;* Lesser General Public License for more details.
+ ;*
+ ;* You should have received a copy of the GNU Lesser General Public
++;* License along with FFmpeg; if not, write to the Free Software
+ ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ;******************************************************************************
+
+ %include "libavutil/x86/x86util.asm"
+
+ SECTION .text
+
+ ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+ %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+ cglobal put_no_rnd_pixels8_x2_exact, 4,5
+ lea r4, [r2*3]
+ pcmpeqb m6, m6
+ .loop:
+ mova m0, [r1]
+ mova m2, [r1+r2]
+ mova m1, [r1+1]
+ mova m3, [r1+r2+1]
+ pxor m0, m6
+ pxor m2, m6
+ pxor m1, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0], m0
+ mova [r0+r2], m2
+ mova m0, [r1+r2*2]
+ mova m1, [r1+r2*2+1]
+ mova m2, [r1+r4]
+ mova m3, [r1+r4+1]
+ pxor m0, m6
+ pxor m1, m6
+ pxor m2, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0+r2*2], m0
+ mova [r0+r4], m2
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+ %endmacro
+
+ INIT_MMX mmxext
+ PUT_NO_RND_PIXELS8_X2_EXACT
+ INIT_MMX 3dnow
+ PUT_NO_RND_PIXELS8_X2_EXACT
+
+
+ ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+ %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+ cglobal put_no_rnd_pixels8_y2_exact, 4,5
+ lea r4, [r2*3]
+ mova m0, [r1]
+ pcmpeqb m6, m6
+ add r1, r2
+ pxor m0, m6
+ .loop:
+ mova m1, [r1]
+ mova m2, [r1+r2]
+ pxor m1, m6
+ pxor m2, m6
+ PAVGB m0, m1
+ PAVGB m1, m2
+ pxor m0, m6
+ pxor m1, m6
+ mova [r0], m0
+ mova [r0+r2], m1
+ mova m1, [r1+r2*2]
+ mova m0, [r1+r4]
+ pxor m1, m6
+ pxor m0, m6
+ PAVGB m2, m1
+ PAVGB m1, m0
+ pxor m2, m6
+ pxor m1, m6
+ mova [r0+r2*2], m2
+ mova [r0+r4], m1
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ REP_RET
+ %endmacro
+
+ INIT_MMX mmxext
+ PUT_NO_RND_PIXELS8_Y2_EXACT
+ INIT_MMX 3dnow
+ PUT_NO_RND_PIXELS8_Y2_EXACT
--- /dev/null
- * This file is part of Libav.
+ /*
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/x86/cpu.h"
+
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/hpeldsp.h"
+
+ #include "hpeldsp.h"
+
+ void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+ void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
+ av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
+ {
+ if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
+ }
+ }
+
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
+ if (flags & AV_CODEC_FLAG_BITEXACT) {
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+ }
+ }
+ }