c->vsse[5] = vsse_intra8_c;
c->nsse[0] = nsse16_c;
c->nsse[1] = nsse8_c;
+#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
+ ff_dsputil_init_dwt(c);
+#endif
- c->try_8x8basis = try_8x8basis_c;
- c->add_8x8basis = add_8x8basis_c;
-
c->shrink[0] = av_image_copy_plane;
c->shrink[1] = ff_shrink22;
c->shrink[2] = ff_shrink44;
if (ff_MPV_common_init(s) < 0)
return -1;
- if (ARCH_X86)
- ff_MPV_encode_init_x86(s);
-
+ ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
ff_qpeldsp_init(&s->qdsp);
s->avctx->coded_frame = s->current_picture.f;
score *= lambda;
unquant_change= new_coeff - old_coeff;
- assert((score < 100*lambda && score > -100*lambda) || lambda==0);
+ av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
- score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
+ score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
+ unquant_change);
if(score<best_score){
best_score= score;
best_coeff= i;
--- /dev/null
- * This file is part of Libav.
+ /*
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
- assert(-512 < b && b < 512);
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include <assert.h>
+ #include <stdint.h>
+
+ #include "config.h"
++#include "libavutil/avassert.h"
+ #include "libavutil/attributes.h"
+ #include "avcodec.h"
+ #include "mpegvideoencdsp.h"
+
+ static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
+ int16_t basis[64], int scale)
+ {
+ int i;
+ unsigned int sum = 0;
+
+ for (i = 0; i < 8 * 8; i++) {
+ int b = rem[i] + ((basis[i] * scale +
+ (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
+ (BASIS_SHIFT - RECON_SHIFT));
+ int w = weight[i];
+ b >>= RECON_SHIFT;
++ av_assert2(-512 < b && b < 512);
+
+ sum += (w * b) * (w * b) >> 4;
+ }
+ return sum >> 2;
+ }
+
+ static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
+ {
+ int i;
+
+ for (i = 0; i < 8 * 8; i++)
+ rem[i] += (basis[i] * scale +
+ (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
+ (BASIS_SHIFT - RECON_SHIFT);
+ }
+
+ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+ {
+ c->try_8x8basis = try_8x8basis_c;
+ c->add_8x8basis = add_8x8basis_c;
+
+ if (ARCH_X86)
+ ff_mpegvideoencdsp_init_x86(c, avctx);
+ }
--- /dev/null
- * This file is part of Libav.
+ /*
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #ifndef AVCODEC_MPEGVIDEOENCDSP_H
+ #define AVCODEC_MPEGVIDEOENCDSP_H
+
+ #include <stdint.h>
+
+ #include "avcodec.h"
+
+ #define BASIS_SHIFT 16
+ #define RECON_SHIFT 6
+
+ typedef struct MpegvideoEncDSPContext {
+ int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
+ int16_t basis[64], int scale);
+ void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+
+ } MpegvideoEncDSPContext;
+
+ void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx);
+ void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx);
+
+ #endif /* AVCODEC_MPEGVIDEOENCDSP_H */
}
#undef SUM
-#define MMABS_MMX(a,z) \
- "pxor " #z ", " #z " \n\t" \
- "pcmpgtw " #a ", " #z " \n\t" \
- "pxor " #z ", " #a " \n\t" \
- "psubw " #z ", " #a " \n\t"
-
-#define MMABS_MMXEXT(a, z) \
- "pxor " #z ", " #z " \n\t" \
- "psubw " #a ", " #z " \n\t" \
- "pmaxsw " #z ", " #a " \n\t"
-
-#define MMABS_SSSE3(a,z) \
- "pabsw " #a ", " #a " \n\t"
-
-#define MMABS_SUM(a,z, sum) \
- MMABS(a,z) \
- "paddusw " #a ", " #sum " \n\t"
-
-/* FIXME: HSUM_* saturates at 64k, while an 8x8 hadamard or dct block can get
- * up to about 100k on extreme inputs. But that's very unlikely to occur in
- * natural video, and it's even more unlikely to not have any alternative
- * mvs/modes with lower cost. */
-#define HSUM_MMX(a, t, dst) \
- "movq " #a ", " #t " \n\t" \
- "psrlq $32, " #a " \n\t" \
- "paddusw " #t ", " #a " \n\t" \
- "movq " #a ", " #t " \n\t" \
- "psrlq $16, " #a " \n\t" \
- "paddusw " #t ", " #a " \n\t" \
- "movd " #a ", " #dst " \n\t" \
-
-#define HSUM_MMXEXT(a, t, dst) \
- "pshufw $0x0E, " #a ", " #t " \n\t" \
- "paddusw " #t ", " #a " \n\t" \
- "pshufw $0x01, " #a ", " #t " \n\t" \
- "paddusw " #t ", " #a " \n\t" \
- "movd " #a ", " #dst " \n\t" \
-
-#define HSUM_SSE2(a, t, dst) \
- "movhlps " #a ", " #t " \n\t" \
- "paddusw " #t ", " #a " \n\t" \
- "pshuflw $0x0E, " #a ", " #t " \n\t" \
- "paddusw " #t ", " #a " \n\t" \
- "pshuflw $0x01, " #a ", " #t " \n\t" \
- "paddusw " #t ", " #a " \n\t" \
- "movd " #a ", " #dst " \n\t" \
-
-#define DCT_SAD4(m, mm, o) \
- "mov"#m" "#o" + 0(%1), " #mm "2 \n\t" \
- "mov"#m" "#o" + 16(%1), " #mm "3 \n\t" \
- "mov"#m" "#o" + 32(%1), " #mm "4 \n\t" \
- "mov"#m" "#o" + 48(%1), " #mm "5 \n\t" \
- MMABS_SUM(mm ## 2, mm ## 6, mm ## 0) \
- MMABS_SUM(mm ## 3, mm ## 7, mm ## 1) \
- MMABS_SUM(mm ## 4, mm ## 6, mm ## 0) \
- MMABS_SUM(mm ## 5, mm ## 7, mm ## 1) \
-
-#define DCT_SAD_MMX \
- "pxor %%mm0, %%mm0 \n\t" \
- "pxor %%mm1, %%mm1 \n\t" \
- DCT_SAD4(q, %%mm, 0) \
- DCT_SAD4(q, %%mm, 8) \
- DCT_SAD4(q, %%mm, 64) \
- DCT_SAD4(q, %%mm, 72) \
- "paddusw %%mm1, %%mm0 \n\t" \
- HSUM(%%mm0, %%mm1, %0)
-
-#define DCT_SAD_SSE2 \
- "pxor %%xmm0, %%xmm0 \n\t" \
- "pxor %%xmm1, %%xmm1 \n\t" \
- DCT_SAD4(dqa, %%xmm, 0) \
- DCT_SAD4(dqa, %%xmm, 64) \
- "paddusw %%xmm1, %%xmm0 \n\t" \
- HSUM(%%xmm0, %%xmm1, %0)
-
-#define DCT_SAD_FUNC(cpu) \
-static int sum_abs_dctelem_ ## cpu(int16_t *block) \
-{ \
- int sum; \
- __asm__ volatile ( \
- DCT_SAD \
- :"=r"(sum) \
- :"r"(block)); \
- return sum & 0xFFFF; \
-}
-
-#define DCT_SAD DCT_SAD_MMX
-#define HSUM(a, t, dst) HSUM_MMX(a, t, dst)
-#define MMABS(a, z) MMABS_MMX(a, z)
-DCT_SAD_FUNC(mmx)
-#undef MMABS
-#undef HSUM
-
-#define HSUM(a, t, dst) HSUM_MMXEXT(a, t, dst)
-#define MMABS(a, z) MMABS_MMXEXT(a, z)
-DCT_SAD_FUNC(mmxext)
-#undef HSUM
-#undef DCT_SAD
-
-#define DCT_SAD DCT_SAD_SSE2
-#define HSUM(a, t, dst) HSUM_SSE2(a, t, dst)
-DCT_SAD_FUNC(sse2)
-#undef MMABS
-
-#if HAVE_SSSE3_INLINE
-#define MMABS(a, z) MMABS_SSSE3(a, z)
-DCT_SAD_FUNC(ssse3)
-#undef MMABS
-#endif
-#undef HSUM
-#undef DCT_SAD
- #define PHADDD(a, t) \
- "movq " #a ", " #t " \n\t" \
- "psrlq $32, " #a " \n\t" \
- "paddd " #t ", " #a " \n\t"
-
- /*
- * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
- * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
- * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
- */
- #define PMULHRW(x, y, s, o) \
- "pmulhw " #s ", " #x " \n\t" \
- "pmulhw " #s ", " #y " \n\t" \
- "paddw " #o ", " #x " \n\t" \
- "paddw " #o ", " #y " \n\t" \
- "psraw $1, " #x " \n\t" \
- "psraw $1, " #y " \n\t"
- #define DEF(x) x ## _mmx
- #define SET_RND MOVQ_WONE
- #define SCALE_OFFSET 1
-
- #include "dsputil_qns_template.c"
-
- #undef DEF
- #undef SET_RND
- #undef SCALE_OFFSET
- #undef PMULHRW
-
- #define DEF(x) x ## _3dnow
- #define SET_RND(x)
- #define SCALE_OFFSET 0
- #define PMULHRW(x, y, s, o) \
- "pmulhrw " #s ", " #x " \n\t" \
- "pmulhrw " #s ", " #y " \n\t"
-
- #include "dsputil_qns_template.c"
-
- #undef DEF
- #undef SET_RND
- #undef SCALE_OFFSET
- #undef PMULHRW
-
- #if HAVE_SSSE3_INLINE
- #undef PHADDD
- #define DEF(x) x ## _ssse3
- #define SET_RND(x)
- #define SCALE_OFFSET -1
-
- #define PHADDD(a, t) \
- "pshufw $0x0E, " #a ", " #t " \n\t" \
- /* faster than phaddd on core2 */ \
- "paddd " #t ", " #a " \n\t"
-
- #define PMULHRW(x, y, s, o) \
- "pmulhrsw " #s ", " #x " \n\t" \
- "pmulhrsw " #s ", " #y " \n\t"
-
- #include "dsputil_qns_template.c"
-
- #undef DEF
- #undef SET_RND
- #undef SCALE_OFFSET
- #undef PMULHRW
- #undef PHADDD
- #endif /* HAVE_SSSE3_INLINE */
-
#endif /* HAVE_INLINE_ASM */
-int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
- int line_size, int h);
-
-#define hadamard_func(cpu) \
- int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \
- uint8_t *src2, int stride, int h); \
- int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \
- uint8_t *src2, int stride, int h);
-
-hadamard_func(mmx)
-hadamard_func(mmxext)
-hadamard_func(sse2)
-hadamard_func(ssse3)
-
av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
(dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX))
c->fdct = ff_fdct_mmx;
- c->sum_abs_dctelem = sum_abs_dctelem_mmx;
-
- c->sse[0] = sse16_mmx;
- c->sse[1] = sse8_mmx;
c->vsad[4] = vsad_intra16_mmx;
- c->nsse[0] = nsse16_mmx;
- c->nsse[1] = nsse8_mmx;
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->vsad[0] = vsad16_mmx;
- c->try_8x8basis = try_8x8basis_mmx;
- }
- c->add_8x8basis = add_8x8basis_mmx;
- }
-
- if (INLINE_AMD3DNOW(cpu_flags)) {
- if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
- c->try_8x8basis = try_8x8basis_3dnow;
}
- c->add_8x8basis = add_8x8basis_3dnow;
}
if (INLINE_MMXEXT(cpu_flags)) {
#if HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags)) {
- if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
- c->try_8x8basis = try_8x8basis_ssse3;
- }
- c->add_8x8basis = add_8x8basis_ssse3;
- c->sum_abs_dctelem = sum_abs_dctelem_ssse3;
}
#endif
#endif /* HAVE_INLINE_ASM */
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
--#include <assert.h>
#include <stdint.h>
++#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/x86/asm.h"
--- /dev/null
- * This file is part of Libav.
+ /*
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/x86/cpu.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/mpegvideoencdsp.h"
+
+ #if HAVE_INLINE_ASM
+
+ #define PHADDD(a, t) \
+ "movq " #a ", " #t " \n\t" \
+ "psrlq $32, " #a " \n\t" \
+ "paddd " #t ", " #a " \n\t"
+
+ /*
+ * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
+ * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
+ * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
+ */
+ #define PMULHRW(x, y, s, o) \
+ "pmulhw " #s ", " #x " \n\t" \
+ "pmulhw " #s ", " #y " \n\t" \
+ "paddw " #o ", " #x " \n\t" \
+ "paddw " #o ", " #y " \n\t" \
+ "psraw $1, " #x " \n\t" \
+ "psraw $1, " #y " \n\t"
+ #define DEF(x) x ## _mmx
+ #define SET_RND MOVQ_WONE
+ #define SCALE_OFFSET 1
+
+ #include "mpegvideoenc_qns_template.c"
+
+ #undef DEF
+ #undef SET_RND
+ #undef SCALE_OFFSET
+ #undef PMULHRW
+
+ #define DEF(x) x ## _3dnow
+ #define SET_RND(x)
+ #define SCALE_OFFSET 0
+ #define PMULHRW(x, y, s, o) \
+ "pmulhrw " #s ", " #x " \n\t" \
+ "pmulhrw " #s ", " #y " \n\t"
+
+ #include "mpegvideoenc_qns_template.c"
+
+ #undef DEF
+ #undef SET_RND
+ #undef SCALE_OFFSET
+ #undef PMULHRW
+
+ #if HAVE_SSSE3_INLINE
+ #undef PHADDD
+ #define DEF(x) x ## _ssse3
+ #define SET_RND(x)
+ #define SCALE_OFFSET -1
+
+ #define PHADDD(a, t) \
+ "pshufw $0x0E, " #a ", " #t " \n\t" \
+ /* faster than phaddd on core2 */ \
+ "paddd " #t ", " #a " \n\t"
+
+ #define PMULHRW(x, y, s, o) \
+ "pmulhrsw " #s ", " #x " \n\t" \
+ "pmulhrsw " #s ", " #y " \n\t"
+
+ #include "mpegvideoenc_qns_template.c"
+
+ #undef DEF
+ #undef SET_RND
+ #undef SCALE_OFFSET
+ #undef PMULHRW
+ #undef PHADDD
+ #endif /* HAVE_SSSE3_INLINE */
+
+ #endif /* HAVE_INLINE_ASM */
+
+ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+ {
+ #if HAVE_INLINE_ASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (INLINE_MMX(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_mmx;
+ }
+ c->add_8x8basis = add_8x8basis_mmx;
+ }
+
+ if (INLINE_AMD3DNOW(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_3dnow;
+ }
+ c->add_8x8basis = add_8x8basis_3dnow;
+ }
+
+ #if HAVE_SSSE3_INLINE
+ if (INLINE_SSSE3(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_ssse3;
+ }
+ c->add_8x8basis = add_8x8basis_ssse3;
+ }
+ #endif /* HAVE_SSSE3_INLINE */
+
+ #endif /* HAVE_INLINE_ASM */
+ }