From: Michael Niedermayer Date: Wed, 9 Jul 2014 22:56:05 +0000 (+0200) Subject: Merge commit 'f46bb608d9d76c543e4929dc8cffe36b84bd789e' X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=2d5e9451de3c7ab00cac6ec4aff290e12a2f190d;p=ffmpeg Merge commit 'f46bb608d9d76c543e4929dc8cffe36b84bd789e' * commit 'f46bb608d9d76c543e4929dc8cffe36b84bd789e': dsputil: Split off pixel block routines into their own context Conflicts: configure libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/pixblockdsp_template.c libavcodec/x86/dsputilenc.asm libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer --- 2d5e9451de3c7ab00cac6ec4aff290e12a2f190d diff --cc configure index 4691c280a1d,7a29e82adf6..632ba44c7e2 --- a/configure +++ b/configure @@@ -1997,17 -1707,16 +1998,17 @@@ threads_if_any="$THREADS_LIST # subsystems dct_select="rdft" - dsputil_select="fdctdsp idctdsp" + dsputil_select="fdctdsp idctdsp pixblockdsp" error_resilience_select="dsputil" +frame_thread_encoder_deps="encoders threads" intrax8_select="error_resilience" mdct_select="fft" rdft_select="fft" mpeg_er_select="error_resilience" mpegaudio_select="mpegaudiodsp" mpegaudiodsp_select="dct" -mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp" +mpegvideo_select="blockdsp dsputil h264chroma hpeldsp idctdsp videodsp" - mpegvideoenc_select="dsputil mpegvideo qpeldsp" + mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp" # decoders / encoders aac_decoder_select="mdct sinewin" @@@ -2022,13 -1730,12 +2023,13 @@@ alac_encoder_select="lpc als_decoder_select="bswapdsp" amrnb_decoder_select="lsp" amrwb_decoder_select="lsp" -amv_decoder_select="sp5x_decoder" -ape_decoder_select="bswapdsp" +amv_decoder_select="sp5x_decoder exif" +amv_encoder_select="aandcttables mpegvideoenc" +ape_decoder_select="bswapdsp llauddsp" asv1_decoder_select="blockdsp bswapdsp idctdsp" - asv1_encoder_select="bswapdsp dsputil fdctdsp" + asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" asv2_decoder_select="blockdsp bswapdsp idctdsp" - asv2_encoder_select="bswapdsp dsputil fdctdsp" + asv2_encoder_select="bswapdsp fdctdsp pixblockdsp" atrac1_decoder_select="mdct sinewin" atrac3_decoder_select="mdct" atrac3p_decoder_select="mdct sinewin" @@@ -2043,12 -1749,11 +2044,12 @@@ cook_decoder_select="audiodsp mdct sine cscd_decoder_select="lzo" cscd_decoder_suggest="zlib" dca_decoder_select="mdct" +dirac_decoder_select="dsputil dwt golomb videodsp" dnxhd_decoder_select="blockdsp idctdsp" - dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc" + dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp" dvvideo_decoder_select="dvprofile idctdsp" - dvvideo_encoder_select="dsputil dvprofile fdctdsp" + dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp" -dxa_decoder_deps="zlib" +dxa_decoder_select="zlib" eac3_decoder_select="ac3_decoder" eac3_encoder_select="ac3_encoder" eamad_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpegvideo" diff --cc libavcodec/arm/Makefile index fbbd0696b71,9ba6c2010ad..6b80de8a2bc --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@@ -63,10 -63,9 +64,11 @@@ ARMV6-OBJS-$(CONFIG_IDCTDSP) arm/simple_idct_armv6.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o + ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_armv6.o ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o +ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o +ARMV6-OBJS-$(CONFIG_VC1_PARSER) += arm/startcode_armv6.o ARMV6-OBJS-$(CONFIG_VP7_DECODER) += arm/vp8_armv6.o \ arm/vp8dsp_init_armv6.o \ arm/vp8dsp_armv6.o diff --cc libavcodec/arm/pixblockdsp_armv6.S index 00000000000,4c925a4daa7..b10ea78e882 mode 000000,100644..100644 --- a/libavcodec/arm/pixblockdsp_armv6.S +++ b/libavcodec/arm/pixblockdsp_armv6.S @@@ -1,0 -1,76 +1,76 @@@ + /* + * Copyright (c) 2009 Mans Rullgard + * - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #include "libavutil/arm/asm.S" + + function ff_get_pixels_armv6, export=1 + pld [r1, r2] + push {r4-r8, lr} + mov lr, #8 + 1: + ldrd_post r4, r5, r1, r2 + subs lr, lr, #1 + uxtb16 r6, r4 + uxtb16 r4, r4, ror #8 + uxtb16 r12, r5 + uxtb16 r8, r5, ror #8 + pld [r1, r2] + pkhbt r5, r6, r4, lsl #16 + pkhtb r6, r4, r6, asr #16 + pkhbt r7, r12, r8, lsl #16 + pkhtb r12, r8, r12, asr #16 + stm r0!, {r5,r6,r7,r12} + bgt 1b + + pop {r4-r8, pc} + endfunc + + function ff_diff_pixels_armv6, export=1 + pld [r1, r3] + pld [r2, r3] + push {r4-r9, lr} + mov lr, #8 + 1: + ldrd_post r4, r5, r1, r3 + ldrd_post r6, r7, r2, r3 + uxtb16 r8, r4 + uxtb16 r4, r4, ror #8 + uxtb16 r9, r6 + uxtb16 r6, r6, ror #8 + pld [r1, r3] + ssub16 r9, r8, r9 + ssub16 r6, r4, r6 + uxtb16 r8, r5 + uxtb16 r5, r5, ror #8 + pld [r2, r3] + pkhbt r4, r9, r6, lsl #16 + pkhtb r6, r6, r9, asr #16 + uxtb16 r9, r7 + uxtb16 r7, r7, ror #8 + ssub16 r9, r8, r9 + ssub16 r5, r5, r7 + subs lr, lr, #1 + pkhbt r8, r9, r5, lsl #16 + pkhtb r9, r5, r9, asr #16 + stm r0!, {r4,r6,r8,r9} + bgt 1b + + pop {r4-r9, pc} + endfunc diff --cc libavcodec/arm/pixblockdsp_init_arm.c index 00000000000,f20769b3bc5..b77c523a6e6 mode 000000,100644..100644 --- a/libavcodec/arm/pixblockdsp_init_arm.c +++ b/libavcodec/arm/pixblockdsp_init_arm.c @@@ -1,0 -1,42 +1,42 @@@ + /* - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #include + + #include "libavutil/attributes.h" + #include "libavutil/cpu.h" + #include "libavutil/arm/cpu.h" + #include "libavcodec/avcodec.h" + #include "libavcodec/pixblockdsp.h" + + void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); + void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, + const uint8_t *s2, int stride); + + av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) + { + int cpu_flags = av_get_cpu_flags(); + + if (have_armv6(cpu_flags)) { + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_armv6; + c->diff_pixels = ff_diff_pixels_armv6; + } + } diff --cc libavcodec/asvenc.c index ae81953f308,9944ffaa7c4..02cf2db9913 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@@ -281,11 -241,17 +281,11 @@@ static av_cold int encode_init(AVCodecC int i; const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2; - avctx->coded_frame = av_frame_alloc(); - if (!avctx->coded_frame) - return AVERROR(ENOMEM); - avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; - avctx->coded_frame->key_frame = 1; - ff_asv_common_init(avctx); - ff_dsputil_init(&a->dsp, avctx); ff_fdctdsp_init(&a->fdsp, avctx); + ff_pixblockdsp_init(&a->pdsp, avctx); - if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE; + if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE; a->inv_qscale= (32*scale*FF_QUALITY_SCALE + avctx->global_quality/2) / avctx->global_quality; diff --cc libavcodec/dnxhdenc.c index f6f9af833ac,e656b6edba2..3ad625352a6 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@@ -33,10 -33,9 +33,10 @@@ #include "fdctdsp.h" #include "internal.h" #include "mpegvideo.h" + #include "pixblockdsp.h" #include "dnxhdenc.h" + // The largest value that will not lead to overflow for 10bit samples. #define DNX10BIT_QMAT_SHIFT 18 #define RC_VARIANCE 1 // use variance or ssd for fast rc @@@ -326,9 -311,8 +326,10 @@@ static av_cold int dnxhd_encode_init(AV ff_fdctdsp_init(&ctx->m.fdsp, avctx); ff_idctdsp_init(&ctx->m.idsp, avctx); ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); + ff_pixblockdsp_init(&ctx->m.pdsp, avctx); ff_dct_common_init(&ctx->m); + ff_dct_encode_init(&ctx->m); + if (!ctx->m.dct_quantize) ctx->m.dct_quantize = ff_dct_quantize_c; diff --cc libavcodec/dsputil.c index c68a70a79e0,8d0cef2e828..1cd9658ba69 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@@ -584,9 -547,9 +556,9 @@@ static int dct_sad8x8_c(MpegEncContext { LOCAL_ALIGNED_16(int16_t, temp, [64]); - assert(h == 8); + av_assert2(h == 8); - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); s->fdsp.fdct(temp); return s->dsp.sum_abs_dctelem(temp); } @@@ -651,9 -614,9 +623,9 @@@ static int dct_max8x8_c(MpegEncContext LOCAL_ALIGNED_16(int16_t, temp, [64]); int sum = 0, i; - assert(h == 8); + av_assert2(h == 8); - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); s->fdsp.fdct(temp); for (i = 0; i < 64; i++) @@@ -669,10 -632,10 +641,10 @@@ static int quant_psnr8x8_c(MpegEncConte int16_t *const bak = temp + 64; int sum = 0, i; - assert(h == 8); + av_assert2(h == 8); s->mb_intra = 0; - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); memcpy(bak, temp, 64 * sizeof(int16_t)); @@@ -773,9 -736,9 +745,9 @@@ static int bit8x8_c(MpegEncContext *s, const int esc_length = s->ac_esc_length; uint8_t *length, *last_length; - assert(h == 8); + av_assert2(h == 8); - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); s->block_last_index[0 /* FIXME */] = last = @@@ -969,10 -904,6 +941,8 @@@ av_cold void ff_dsputil_init(DSPContex { const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + ff_check_alignment(); + - c->diff_pixels = diff_pixels_c; - c->sum_abs_dctelem = sum_abs_dctelem_c; /* TODO [0] 16 [1] 8 */ @@@ -1015,27 -944,7 +985,12 @@@ c->vsse[5] = vsse_intra8_c; c->nsse[0] = nsse16_c; c->nsse[1] = nsse8_c; +#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER + ff_dsputil_init_dwt(c); +#endif - switch (avctx->bits_per_raw_sample) { - case 9: - case 10: - case 12: - case 14: - c->get_pixels = get_pixels_16_c; - break; - default: - if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { - c->get_pixels = get_pixels_8_c; - } - break; - } - - + if (ARCH_ALPHA) + ff_dsputil_init_alpha(c, avctx); if (ARCH_ARM) ff_dsputil_init_arm(c, avctx, high_bit_depth); if (ARCH_PPC) diff --cc libavcodec/dvenc.c index a60b834dfe2,9f458e3e47c..aeb4a332596 --- a/libavcodec/dvenc.c +++ b/libavcodec/dvenc.c @@@ -67,12 -65,12 +69,13 @@@ static av_cold int dvvideo_encode_init( dv_vlc_map_tableinit(); + memset(&dsp,0, sizeof(dsp)); ff_dsputil_init(&dsp, avctx); ff_fdctdsp_init(&fdsp, avctx); + ff_pixblockdsp_init(&pdsp, avctx); ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp); - s->get_pixels = dsp.get_pixels; + s->get_pixels = pdsp.get_pixels; s->ildct_cmp = dsp.ildct_cmp[5]; s->fdct[0] = fdsp.fdct; diff --cc libavcodec/libavcodec.v index 5909dce46b6,bf148075c71..5a8c005b97d --- a/libavcodec/libavcodec.v +++ b/libavcodec/libavcodec.v @@@ -1,33 -1,4 +1,34 @@@ LIBAVCODEC_$MAJOR { global: av*; + #deprecated, remove after next bump + audio_resample; + audio_resample_close; + dsputil_init; + ff_dsputil_init; + ff_find_pix_fmt; + ff_framenum_to_drop_timecode; + ff_framenum_to_smtpe_timecode; + ff_raw_pix_fmt_tags; + ff_init_smtpe_timecode; + ff_fft*; + ff_mdct*; + ff_dct*; + ff_rdft*; + ff_prores_idct_put_10_sse2; + ff_simple_idct*; + ff_aanscales; + ff_faan*; + ff_mmx_idct; + ff_fdct*; + fdct_ifast; + j_rev_dct; + ff_mmxext_idct; + ff_idct_xvid*; + ff_jpeg_fdct*; + ff_dnxhd_get_cid_table; + ff_dnxhd_cid_table; + ff_idctdsp_init; + ff_fdctdsp_init; ++ ff_pixblockdsp_init; local: *; }; diff --cc libavcodec/mpegvideo_enc.c index 826f061eeaf,e2504c7b721..56867ccb85e --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@@ -818,8 -698,12 +818,9 @@@ av_cold int ff_MPV_encode_init(AVCodecC if (ff_MPV_common_init(s) < 0) return -1; - if (ARCH_X86) - ff_MPV_encode_init_x86(s); - ff_fdctdsp_init(&s->fdsp, avctx); ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); + ff_pixblockdsp_init(&s->pdsp, avctx); ff_qpeldsp_init(&s->qdsp); s->avctx->coded_frame = s->current_picture.f; @@@ -2102,18 -1953,13 +2103,18 @@@ static av_always_inline void encode_mb_ skip_dct[4] = 1; skip_dct[5] = 1; } else { - s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); - s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); + s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c); + s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c); - if (!s->chroma_y_shift) { /* 422 */ - s->pdsp.get_pixels(s->block[6], - ptr_cb + (dct_offset >> 1), wrap_c); - s->pdsp.get_pixels(s->block[7], - ptr_cr + (dct_offset >> 1), wrap_c); + if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */ - s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c); - s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c); ++ s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c); ++ s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c); + } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */ - s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c); - s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c); - s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c); - s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c); - s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c); - s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c); ++ s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c); ++ s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c); ++ s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c); ++ s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c); ++ s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c); ++ s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c); } } } else { @@@ -2191,13 -2036,13 +2192,13 @@@ skip_dct[4] = 1; skip_dct[5] = 1; } else { - s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); - s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); + s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); + s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); if (!s->chroma_y_shift) { /* 422 */ - s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset, - dest_cb + uv_dct_offset, wrap_c); - s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset, - dest_cr + uv_dct_offset, wrap_c); - s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1), - dest_cb + (dct_offset >> 1), wrap_c); - s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1), - dest_cr + (dct_offset >> 1), wrap_c); ++ s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset, ++ dest_cb + uv_dct_offset, wrap_c); ++ s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset, ++ dest_cr + uv_dct_offset, wrap_c); } } /* pre quantization */ diff --cc libavcodec/pixblockdsp.c index 00000000000,71423f9cfc9..a69948e43ef mode 000000,100644..100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@@ -1,0 -1,76 +1,80 @@@ + /* - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #include + + #include "config.h" + #include "libavutil/attributes.h" + #include "avcodec.h" + #include "pixblockdsp.h" + + #define BIT_DEPTH 16 + #include "pixblockdsp_template.c" + #undef BIT_DEPTH + + #define BIT_DEPTH 8 + #include "pixblockdsp_template.c" + -static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, ++static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, + const uint8_t *s2, int stride) + { + int i; + + /* read the pixels */ + for (i = 0; i < 8; i++) { + block[0] = s1[0] - s2[0]; + block[1] = s1[1] - s2[1]; + block[2] = s1[2] - s2[2]; + block[3] = s1[3] - s2[3]; + block[4] = s1[4] - s2[4]; + block[5] = s1[5] - s2[5]; + block[6] = s1[6] - s2[6]; + block[7] = s1[7] - s2[7]; + s1 += stride; + s2 += stride; + block += 8; + } + } + + av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) + { + const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + + c->diff_pixels = diff_pixels_c; + + switch (avctx->bits_per_raw_sample) { + case 9: + case 10: ++ case 12: ++ case 14: + c->get_pixels = get_pixels_16_c; + break; + default: - c->get_pixels = get_pixels_8_c; ++ if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { ++ c->get_pixels = get_pixels_8_c; ++ } + break; + } + + if (ARCH_ARM) + ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); + if (ARCH_PPC) + ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); + if (ARCH_X86) + ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); + } diff --cc libavcodec/pixblockdsp.h index 00000000000,8094d14b68e..a724ffbef0a mode 000000,100644..100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@@ -1,0 -1,44 +1,44 @@@ + /* - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #ifndef AVCODEC_PIXBLOCKDSP_H + #define AVCODEC_PIXBLOCKDSP_H + + #include + + #include "avcodec.h" + + typedef struct PixblockDSPContext { + void (*get_pixels)(int16_t *block /* align 16 */, + const uint8_t *pixels /* align 8 */, + int line_size); + void (*diff_pixels)(int16_t *block /* align 16 */, + const uint8_t *s1 /* align 8 */, + const uint8_t *s2 /* align 8 */, + int stride); + } PixblockDSPContext; + + void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); + void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + + #endif /* AVCODEC_PIXBLOCKDSP_H */ diff --cc libavcodec/pixblockdsp_template.c index 711c404a972,71d3cf150d6..3aeddf526c7 --- a/libavcodec/pixblockdsp_template.c +++ b/libavcodec/pixblockdsp_template.c @@@ -1,13 -1,7 +1,7 @@@ /* - * DSP utils - * Copyright (c) 2000, 2001 Fabrice Bellard - * Copyright (c) 2002-2004 Michael Niedermayer - * - * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer - * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. @@@ -22,14 -16,9 +16,9 @@@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ - /** - * @file - * DSP utils - */ - #include "bit_depth_template.c" -static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels, +static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels, int line_size) { const pixel *pixels = (const pixel *) _pixels; diff --cc libavcodec/ppc/pixblockdsp.c index 00000000000,698d655fc6a..42c5be842ea mode 000000,100644..100644 --- a/libavcodec/ppc/pixblockdsp.c +++ b/libavcodec/ppc/pixblockdsp.c @@@ -1,0 -1,153 +1,153 @@@ + /* + * Copyright (c) 2002 Brian Foley + * Copyright (c) 2002 Dieter Shirley + * Copyright (c) 2003-2004 Romain Dolbeau + * - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #include "config.h" + #if HAVE_ALTIVEC_H + #include + #endif + + #include "libavutil/attributes.h" + #include "libavutil/cpu.h" + #include "libavutil/ppc/cpu.h" + #include "libavutil/ppc/types_altivec.h" + #include "libavutil/ppc/util_altivec.h" + #include "libavcodec/avcodec.h" + #include "libavcodec/pixblockdsp.h" + + #if HAVE_ALTIVEC + + static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, + int line_size) + { + int i; + vector unsigned char perm = vec_lvsl(0, pixels); + const vector unsigned char zero = + (const vector unsigned char) vec_splat_u8(0); + + for (i = 0; i < 8; i++) { + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + vector unsigned char pixl = vec_ld(0, pixels); + vector unsigned char pixr = vec_ld(7, pixels); + vector unsigned char bytes = vec_perm(pixl, pixr, perm); + + // Convert the bytes into shorts. + vector signed short shorts = (vector signed short) vec_mergeh(zero, + bytes); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_st(shorts, i * 16, (vector signed short *) block); + + pixels += line_size; + } + } + + static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, + const uint8_t *s2, int stride) + { + int i; + vector unsigned char perm1 = vec_lvsl(0, s1); + vector unsigned char perm2 = vec_lvsl(0, s2); + const vector unsigned char zero = + (const vector unsigned char) vec_splat_u8(0); + vector signed short shorts1, shorts2; + + for (i = 0; i < 4; i++) { + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + vector unsigned char pixl = vec_ld(0, s1); + vector unsigned char pixr = vec_ld(15, s1); + vector unsigned char bytes = vec_perm(pixl, pixr, perm1); + + // Convert the bytes into shorts. + shorts1 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the same for the second block of pixels. + pixl = vec_ld(0, s2); + pixr = vec_ld(15, s2); + bytes = vec_perm(pixl, pixr, perm2); + + // Convert the bytes into shorts. + shorts2 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the subtraction. + shorts1 = vec_sub(shorts1, shorts2); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_st(shorts1, 0, (vector signed short *) block); + + s1 += stride; + s2 += stride; + block += 8; + + /* The code below is a copy of the code above... + * This is a manual unroll. */ + + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + pixl = vec_ld(0, s1); + pixr = vec_ld(15, s1); + bytes = vec_perm(pixl, pixr, perm1); + + // Convert the bytes into shorts. + shorts1 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the same for the second block of pixels. + pixl = vec_ld(0, s2); + pixr = vec_ld(15, s2); + bytes = vec_perm(pixl, pixr, perm2); + + // Convert the bytes into shorts. + shorts2 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the subtraction. + shorts1 = vec_sub(shorts1, shorts2); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_st(shorts1, 0, (vector signed short *) block); + + s1 += stride; + s2 += stride; + block += 8; + } + } + + #endif /* HAVE_ALTIVEC */ + + av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) + { + #if HAVE_ALTIVEC + if (!PPC_ALTIVEC(av_get_cpu_flags())) + return; + + c->diff_pixels = diff_pixels_altivec; + + if (!high_bit_depth) { + c->get_pixels = get_pixels_altivec; + } + #endif /* HAVE_ALTIVEC */ + } diff --cc libavcodec/x86/Makefile index 0843dcc7749,7c5ac3decf6..44ccb2040f8 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@@ -105,11 -92,9 +106,12 @@@ YASM-OBJS-$(CONFIG_HEVC_DECODER) YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ x86/hpeldsp.o YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o +YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o +YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o +YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o + YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ x86/fpel.o \ x86/qpel.o diff --cc libavcodec/x86/dsputilenc.asm index 13682ba5d46,8d989c26f89..023f512edd8 --- a/libavcodec/x86/dsputilenc.asm +++ b/libavcodec/x86/dsputilenc.asm @@@ -328,249 -323,14 +328,140 @@@ cglobal sse%1, 5,5,8, v, pix1, pix2, ls paddd m7, m1 paddd m7, m3 - dec r4 +%if %1 == mmsize + lea pix1q, [pix1q + 2*lsizeq] + lea pix2q, [pix2q + 2*lsizeq] +%else + add pix1q, lsizeq + add pix2q, lsizeq +%endif + dec hd jnz .next2lines - mova m1, m7 - psrldq m7, 8 ; shift hi qword to lo - paddd m7, m1 - mova m1, m7 - psrldq m7, 4 ; shift hi dword to lo - paddd m7, m1 + HADDD m7, m1 movd eax, m7 ; return value RET +%endmacro + +INIT_MMX mmx +SUM_SQUARED_ERRORS 8 + +INIT_MMX mmx +SUM_SQUARED_ERRORS 16 + +INIT_XMM sse2 +SUM_SQUARED_ERRORS 16 + - INIT_MMX mmx - ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) - cglobal get_pixels, 3,4 - movsxdifnidn r2, r2d - add r0, 128 - mov r3, -128 - pxor m7, m7 - .loop: - mova m0, [r1] - mova m2, [r1+r2] - mova m1, m0 - mova m3, m2 - punpcklbw m0, m7 - punpckhbw m1, m7 - punpcklbw m2, m7 - punpckhbw m3, m7 - mova [r0+r3+ 0], m0 - mova [r0+r3+ 8], m1 - mova [r0+r3+16], m2 - mova [r0+r3+24], m3 - lea r1, [r1+r2*2] - add r3, 32 - js .loop - REP_RET - - INIT_XMM sse2 - cglobal get_pixels, 3, 4, 5 - movsxdifnidn r2, r2d - lea r3, [r2*3] - pxor m4, m4 - movh m0, [r1] - movh m1, [r1+r2] - movh m2, [r1+r2*2] - movh m3, [r1+r3] - lea r1, [r1+r2*4] - punpcklbw m0, m4 - punpcklbw m1, m4 - punpcklbw m2, m4 - punpcklbw m3, m4 - mova [r0], m0 - mova [r0+0x10], m1 - mova [r0+0x20], m2 - mova [r0+0x30], m3 - movh m0, [r1] - movh m1, [r1+r2*1] - movh m2, [r1+r2*2] - movh m3, [r1+r3] - punpcklbw m0, m4 - punpcklbw m1, m4 - punpcklbw m2, m4 - punpcklbw m3, m4 - mova [r0+0x40], m0 - mova [r0+0x50], m1 - mova [r0+0x60], m2 - mova [r0+0x70], m3 - RET - - INIT_MMX mmx - ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, - ; int stride); - cglobal diff_pixels, 4,5 - movsxdifnidn r3, r3d - pxor m7, m7 - add r0, 128 - mov r4, -128 - .loop: - mova m0, [r1] - mova m2, [r2] - mova m1, m0 - mova m3, m2 - punpcklbw m0, m7 - punpckhbw m1, m7 - punpcklbw m2, m7 - punpckhbw m3, m7 - psubw m0, m2 - psubw m1, m3 - mova [r0+r4+0], m0 - mova [r0+r4+8], m1 - add r1, r3 - add r2, r3 - add r4, 16 - jne .loop - REP_RET - - INIT_XMM sse2 - cglobal diff_pixels, 4, 5, 5 - movsxdifnidn r3, r3d - pxor m4, m4 - add r0, 128 - mov r4, -128 - .loop: - movh m0, [r1] - movh m2, [r2] - movh m1, [r1+r3] - movh m3, [r2+r3] - punpcklbw m0, m4 - punpcklbw m1, m4 - punpcklbw m2, m4 - punpcklbw m3, m4 - psubw m0, m2 - psubw m1, m3 - mova [r0+r4+0 ], m0 - mova [r0+r4+16], m1 - lea r1, [r1+r3*2] - lea r2, [r2+r3*2] - add r4, 32 - jne .loop - RET - +;----------------------------------------------- +;int ff_sum_abs_dctelem(int16_t *block) +;----------------------------------------------- +; %1 = number of xmm registers used +; %2 = number of inline loops + +%macro SUM_ABS_DCTELEM 2 +cglobal sum_abs_dctelem, 1, 1, %1, block + pxor m0, m0 + pxor m1, m1 +%assign %%i 0 +%rep %2 + mova m2, [blockq+mmsize*(0+%%i)] + mova m3, [blockq+mmsize*(1+%%i)] + mova m4, [blockq+mmsize*(2+%%i)] + mova m5, [blockq+mmsize*(3+%%i)] + ABS1_SUM m2, m6, m0 + ABS1_SUM m3, m6, m1 + ABS1_SUM m4, m6, m0 + ABS1_SUM m5, m6, m1 +%assign %%i %%i+4 +%endrep + paddusw m0, m1 + HSUM m0, m1, eax + and eax, 0xFFFF + RET +%endmacro + +INIT_MMX mmx +SUM_ABS_DCTELEM 0, 4 +INIT_MMX mmxext +SUM_ABS_DCTELEM 0, 4 +INIT_XMM sse2 +SUM_ABS_DCTELEM 7, 2 +INIT_XMM ssse3 +SUM_ABS_DCTELEM 6, 2 + +;------------------------------------------------------------------------------ +; int ff_hf_noise*_mmx(uint8_t *pix1, int lsize, int h) +;------------------------------------------------------------------------------ +; %1 = 8/16. %2-5=m# +%macro HF_NOISE_PART1 5 + mova m%2, [pix1q] +%if %1 == 8 + mova m%3, m%2 + psllq m%2, 8 + psrlq m%3, 8 + psrlq m%2, 8 +%else + mova m%3, [pix1q+1] +%endif + mova m%4, m%2 + mova m%5, m%3 + punpcklbw m%2, m7 + punpcklbw m%3, m7 + punpckhbw m%4, m7 + punpckhbw m%5, m7 + psubw m%2, m%3 + psubw m%4, m%5 +%endmacro + +; %1-2 = m# +%macro HF_NOISE_PART2 4 + psubw m%1, m%3 + psubw m%2, m%4 + pxor m3, m3 + pxor m1, m1 + pcmpgtw m3, m%1 + pcmpgtw m1, m%2 + pxor m%1, m3 + pxor m%2, m1 + psubw m%1, m3 + psubw m%2, m1 + paddw m%2, m%1 + paddw m6, m%2 +%endmacro + +; %1 = 8/16 +%macro HF_NOISE 1 +cglobal hf_noise%1, 3,3,0, pix1, lsize, h + movsxdifnidn lsizeq, lsized + sub hd, 2 + pxor m7, m7 + pxor m6, m6 + HF_NOISE_PART1 %1, 0, 1, 2, 3 + add pix1q, lsizeq + HF_NOISE_PART1 %1, 4, 1, 5, 3 + HF_NOISE_PART2 0, 2, 4, 5 + add pix1q, lsizeq +.loop: + HF_NOISE_PART1 %1, 0, 1, 2, 3 + HF_NOISE_PART2 4, 5, 0, 2 + add pix1q, lsizeq + HF_NOISE_PART1 %1, 4, 1, 5, 3 + HF_NOISE_PART2 0, 2, 4, 5 + add pix1q, lsizeq + sub hd, 2 + jne .loop + + mova m0, m6 + punpcklwd m0, m7 + punpckhwd m6, m7 + paddd m6, m0 + mova m0, m6 + psrlq m6, 32 + paddd m0, m6 + movd eax, m0 ; eax = result of hf_noise8; + REP_RET ; return eax; +%endmacro + +INIT_MMX mmx +HF_NOISE 8 +HF_NOISE 16 diff --cc libavcodec/x86/dsputilenc_mmx.c index d0936595d0b,5a7d911ca86..5d48a78daa0 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@@ -30,37 -30,381 +30,31 @@@ #include "libavcodec/mpegvideo.h" #include "dsputil_x86.h" - void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); - void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); - void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); - void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); -#if HAVE_INLINE_ASM - -static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int tmp; - - __asm__ volatile ( - "movl %4, %%ecx \n" - "shr $1, %%ecx \n" - "pxor %%mm0, %%mm0 \n" /* mm0 = 0 */ - "pxor %%mm7, %%mm7 \n" /* mm7 holds the sum */ - "1: \n" - "movq (%0), %%mm1 \n" /* mm1 = pix1[0][0 - 7] */ - "movq (%1), %%mm2 \n" /* mm2 = pix2[0][0 - 7] */ - "movq (%0, %3), %%mm3 \n" /* mm3 = pix1[1][0 - 7] */ - "movq (%1, %3), %%mm4 \n" /* mm4 = pix2[1][0 - 7] */ - - /* todo: mm1-mm2, mm3-mm4 */ - /* algo: subtract mm1 from mm2 with saturation and vice versa */ - /* OR the results to get absolute difference */ - "movq %%mm1, %%mm5 \n" - "movq %%mm3, %%mm6 \n" - "psubusb %%mm2, %%mm1 \n" - "psubusb %%mm4, %%mm3 \n" - "psubusb %%mm5, %%mm2 \n" - "psubusb %%mm6, %%mm4 \n" - - "por %%mm1, %%mm2 \n" - "por %%mm3, %%mm4 \n" - - /* now convert to 16-bit vectors so we can square them */ - "movq %%mm2, %%mm1 \n" - "movq %%mm4, %%mm3 \n" - - "punpckhbw %%mm0, %%mm2 \n" - "punpckhbw %%mm0, %%mm4 \n" - "punpcklbw %%mm0, %%mm1 \n" /* mm1 now spread over (mm1, mm2) */ - "punpcklbw %%mm0, %%mm3 \n" /* mm4 now spread over (mm3, mm4) */ - - "pmaddwd %%mm2, %%mm2 \n" - "pmaddwd %%mm4, %%mm4 \n" - "pmaddwd %%mm1, %%mm1 \n" - "pmaddwd %%mm3, %%mm3 \n" - - "lea (%0, %3, 2), %0 \n" /* pix1 += 2 * line_size */ - "lea (%1, %3, 2), %1 \n" /* pix2 += 2 * line_size */ - - "paddd %%mm2, %%mm1 \n" - "paddd %%mm4, %%mm3 \n" - "paddd %%mm1, %%mm7 \n" - "paddd %%mm3, %%mm7 \n" - - "decl %%ecx \n" - "jnz 1b \n" - - "movq %%mm7, %%mm1 \n" - "psrlq $32, %%mm7 \n" /* shift hi dword to lo */ - "paddd %%mm7, %%mm1 \n" - "movd %%mm1, %2 \n" - : "+r" (pix1), "+r" (pix2), "=r" (tmp) - : "r" ((x86_reg) line_size), "m" (h) - : "%ecx"); - - return tmp; -} - -static int sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int tmp; - - __asm__ volatile ( - "movl %4, %%ecx\n" - "pxor %%mm0, %%mm0\n" /* mm0 = 0 */ - "pxor %%mm7, %%mm7\n" /* mm7 holds the sum */ - "1:\n" - "movq (%0), %%mm1\n" /* mm1 = pix1[0 - 7] */ - "movq (%1), %%mm2\n" /* mm2 = pix2[0 - 7] */ - "movq 8(%0), %%mm3\n" /* mm3 = pix1[8 - 15] */ - "movq 8(%1), %%mm4\n" /* mm4 = pix2[8 - 15] */ - - /* todo: mm1-mm2, mm3-mm4 */ - /* algo: subtract mm1 from mm2 with saturation and vice versa */ - /* OR the results to get absolute difference */ - "movq %%mm1, %%mm5\n" - "movq %%mm3, %%mm6\n" - "psubusb %%mm2, %%mm1\n" - "psubusb %%mm4, %%mm3\n" - "psubusb %%mm5, %%mm2\n" - "psubusb %%mm6, %%mm4\n" - - "por %%mm1, %%mm2\n" - "por %%mm3, %%mm4\n" - - /* now convert to 16-bit vectors so we can square them */ - "movq %%mm2, %%mm1\n" - "movq %%mm4, %%mm3\n" - - "punpckhbw %%mm0, %%mm2\n" - "punpckhbw %%mm0, %%mm4\n" - "punpcklbw %%mm0, %%mm1\n" /* mm1 now spread over (mm1, mm2) */ - "punpcklbw %%mm0, %%mm3\n" /* mm4 now spread over (mm3, mm4) */ - - "pmaddwd %%mm2, %%mm2\n" - "pmaddwd %%mm4, %%mm4\n" - "pmaddwd %%mm1, %%mm1\n" - "pmaddwd %%mm3, %%mm3\n" - - "add %3, %0\n" - "add %3, %1\n" - - "paddd %%mm2, %%mm1\n" - "paddd %%mm4, %%mm3\n" - "paddd %%mm1, %%mm7\n" - "paddd %%mm3, %%mm7\n" - - "decl %%ecx\n" - "jnz 1b\n" - - "movq %%mm7, %%mm1\n" - "psrlq $32, %%mm7\n" /* shift hi dword to lo */ - "paddd %%mm7, %%mm1\n" - "movd %%mm1, %2\n" - : "+r" (pix1), "+r" (pix2), "=r" (tmp) - : "r" ((x86_reg) line_size), "m" (h) - : "%ecx"); - - return tmp; -} - -static int hf_noise8_mmx(uint8_t *pix1, int line_size, int h) -{ - int tmp; - - __asm__ volatile ( - "movl %3, %%ecx\n" - "pxor %%mm7, %%mm7\n" - "pxor %%mm6, %%mm6\n" - - "movq (%0), %%mm0\n" - "movq %%mm0, %%mm1\n" - "psllq $8, %%mm0\n" - "psrlq $8, %%mm1\n" - "psrlq $8, %%mm0\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm0\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm2\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm0\n" - "psubw %%mm3, %%mm2\n" - - "add %2, %0\n" - - "movq (%0), %%mm4\n" - "movq %%mm4, %%mm1\n" - "psllq $8, %%mm4\n" - "psrlq $8, %%mm1\n" - "psrlq $8, %%mm4\n" - "movq %%mm4, %%mm5\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm4\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm5\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm4\n" - "psubw %%mm3, %%mm5\n" - "psubw %%mm4, %%mm0\n" - "psubw %%mm5, %%mm2\n" - "pxor %%mm3, %%mm3\n" - "pxor %%mm1, %%mm1\n" - "pcmpgtw %%mm0, %%mm3\n\t" - "pcmpgtw %%mm2, %%mm1\n\t" - "pxor %%mm3, %%mm0\n" - "pxor %%mm1, %%mm2\n" - "psubw %%mm3, %%mm0\n" - "psubw %%mm1, %%mm2\n" - "paddw %%mm0, %%mm2\n" - "paddw %%mm2, %%mm6\n" - - "add %2, %0\n" - "1:\n" - - "movq (%0), %%mm0\n" - "movq %%mm0, %%mm1\n" - "psllq $8, %%mm0\n" - "psrlq $8, %%mm1\n" - "psrlq $8, %%mm0\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm0\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm2\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm0\n" - "psubw %%mm3, %%mm2\n" - "psubw %%mm0, %%mm4\n" - "psubw %%mm2, %%mm5\n" - "pxor %%mm3, %%mm3\n" - "pxor %%mm1, %%mm1\n" - "pcmpgtw %%mm4, %%mm3\n\t" - "pcmpgtw %%mm5, %%mm1\n\t" - "pxor %%mm3, %%mm4\n" - "pxor %%mm1, %%mm5\n" - "psubw %%mm3, %%mm4\n" - "psubw %%mm1, %%mm5\n" - "paddw %%mm4, %%mm5\n" - "paddw %%mm5, %%mm6\n" - - "add %2, %0\n" - - "movq (%0), %%mm4\n" - "movq %%mm4, %%mm1\n" - "psllq $8, %%mm4\n" - "psrlq $8, %%mm1\n" - "psrlq $8, %%mm4\n" - "movq %%mm4, %%mm5\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm4\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm5\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm4\n" - "psubw %%mm3, %%mm5\n" - "psubw %%mm4, %%mm0\n" - "psubw %%mm5, %%mm2\n" - "pxor %%mm3, %%mm3\n" - "pxor %%mm1, %%mm1\n" - "pcmpgtw %%mm0, %%mm3\n\t" - "pcmpgtw %%mm2, %%mm1\n\t" - "pxor %%mm3, %%mm0\n" - "pxor %%mm1, %%mm2\n" - "psubw %%mm3, %%mm0\n" - "psubw %%mm1, %%mm2\n" - "paddw %%mm0, %%mm2\n" - "paddw %%mm2, %%mm6\n" - - "add %2, %0\n" - "subl $2, %%ecx\n" - " jnz 1b\n" - - "movq %%mm6, %%mm0\n" - "punpcklwd %%mm7, %%mm0\n" - "punpckhwd %%mm7, %%mm6\n" - "paddd %%mm0, %%mm6\n" - - "movq %%mm6, %%mm0\n" - "psrlq $32, %%mm6\n" - "paddd %%mm6, %%mm0\n" - "movd %%mm0, %1\n" - : "+r" (pix1), "=r" (tmp) - : "r" ((x86_reg) line_size), "g" (h - 2) - : "%ecx"); - - return tmp; -} - -static int hf_noise16_mmx(uint8_t *pix1, int line_size, int h) -{ - int tmp; - uint8_t *pix = pix1; - - __asm__ volatile ( - "movl %3, %%ecx\n" - "pxor %%mm7, %%mm7\n" - "pxor %%mm6, %%mm6\n" - - "movq (%0), %%mm0\n" - "movq 1(%0), %%mm1\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm0\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm2\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm0\n" - "psubw %%mm3, %%mm2\n" - - "add %2, %0\n" - - "movq (%0), %%mm4\n" - "movq 1(%0), %%mm1\n" - "movq %%mm4, %%mm5\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm4\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm5\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm4\n" - "psubw %%mm3, %%mm5\n" - "psubw %%mm4, %%mm0\n" - "psubw %%mm5, %%mm2\n" - "pxor %%mm3, %%mm3\n" - "pxor %%mm1, %%mm1\n" - "pcmpgtw %%mm0, %%mm3\n\t" - "pcmpgtw %%mm2, %%mm1\n\t" - "pxor %%mm3, %%mm0\n" - "pxor %%mm1, %%mm2\n" - "psubw %%mm3, %%mm0\n" - "psubw %%mm1, %%mm2\n" - "paddw %%mm0, %%mm2\n" - "paddw %%mm2, %%mm6\n" - - "add %2, %0\n" - "1:\n" - - "movq (%0), %%mm0\n" - "movq 1(%0), %%mm1\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm0\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm2\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm0\n" - "psubw %%mm3, %%mm2\n" - "psubw %%mm0, %%mm4\n" - "psubw %%mm2, %%mm5\n" - "pxor %%mm3, %%mm3\n" - "pxor %%mm1, %%mm1\n" - "pcmpgtw %%mm4, %%mm3\n\t" - "pcmpgtw %%mm5, %%mm1\n\t" - "pxor %%mm3, %%mm4\n" - "pxor %%mm1, %%mm5\n" - "psubw %%mm3, %%mm4\n" - "psubw %%mm1, %%mm5\n" - "paddw %%mm4, %%mm5\n" - "paddw %%mm5, %%mm6\n" - - "add %2, %0\n" - - "movq (%0), %%mm4\n" - "movq 1(%0), %%mm1\n" - "movq %%mm4, %%mm5\n" - "movq %%mm1, %%mm3\n" - "punpcklbw %%mm7, %%mm4\n" - "punpcklbw %%mm7, %%mm1\n" - "punpckhbw %%mm7, %%mm5\n" - "punpckhbw %%mm7, %%mm3\n" - "psubw %%mm1, %%mm4\n" - "psubw %%mm3, %%mm5\n" - "psubw %%mm4, %%mm0\n" - "psubw %%mm5, %%mm2\n" - "pxor %%mm3, %%mm3\n" - "pxor %%mm1, %%mm1\n" - "pcmpgtw %%mm0, %%mm3\n\t" - "pcmpgtw %%mm2, %%mm1\n\t" - "pxor %%mm3, %%mm0\n" - "pxor %%mm1, %%mm2\n" - "psubw %%mm3, %%mm0\n" - "psubw %%mm1, %%mm2\n" - "paddw %%mm0, %%mm2\n" - "paddw %%mm2, %%mm6\n" - - "add %2, %0\n" - "subl $2, %%ecx\n" - " jnz 1b\n" +int ff_sum_abs_dctelem_mmx(int16_t *block); +int ff_sum_abs_dctelem_mmxext(int16_t *block); +int ff_sum_abs_dctelem_sse2(int16_t *block); +int ff_sum_abs_dctelem_ssse3(int16_t *block); +int ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, + int line_size, int h); +int ff_sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, + int line_size, int h); +int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, + int line_size, int h); +int ff_hf_noise8_mmx(uint8_t *pix1, int lsize, int h); +int ff_hf_noise16_mmx(uint8_t *pix1, int lsize, int h); - "movq %%mm6, %%mm0\n" - "punpcklwd %%mm7, %%mm0\n" - "punpckhwd %%mm7, %%mm6\n" - "paddd %%mm0, %%mm6\n" +#define hadamard_func(cpu) \ + int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \ + uint8_t *src2, int stride, int h); \ + int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \ + uint8_t *src2, int stride, int h); - "movq %%mm6, %%mm0\n" - "psrlq $32, %%mm6\n" - "paddd %%mm6, %%mm0\n" - "movd %%mm0, %1\n" - : "+r" (pix1), "=r" (tmp) - : "r" ((x86_reg) line_size), "g" (h - 2) - : "%ecx"); +hadamard_func(mmx) +hadamard_func(mmxext) +hadamard_func(sse2) +hadamard_func(ssse3) - return tmp + hf_noise8_mmx(pix + 8, line_size, h); -} +#if HAVE_YASM static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2, int line_size, int h) @@@ -353,20 -818,16 +347,10 @@@ av_cold void ff_dsputilenc_init_mmx(DSP { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_MMX(cpu_flags)) { - if (!high_bit_depth) - c->get_pixels = ff_get_pixels_mmx; - c->diff_pixels = ff_diff_pixels_mmx; - } - - if (EXTERNAL_SSE2(cpu_flags)) - if (!high_bit_depth) - c->get_pixels = ff_get_pixels_sse2; - #if HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) { - c->sum_abs_dctelem = sum_abs_dctelem_mmx; - - c->sse[0] = sse16_mmx; - c->sse[1] = sse8_mmx; c->vsad[4] = vsad_intra16_mmx; - c->nsse[0] = nsse16_mmx; - c->nsse[1] = nsse8_mmx; if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { c->vsad[0] = vsad16_mmx; } @@@ -409,8 -865,6 +393,7 @@@ if (EXTERNAL_SSE2(cpu_flags)) { c->sse[0] = ff_sse16_sse2; + c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2; - c->diff_pixels = ff_diff_pixels_sse2; #if HAVE_ALIGNED_STACK c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; diff --cc libavcodec/x86/pixblockdsp.asm index 00000000000,c8fd1b24a13..00ee9b4ac23 mode 000000,100644..100644 --- a/libavcodec/x86/pixblockdsp.asm +++ b/libavcodec/x86/pixblockdsp.asm @@@ -1,0 -1,110 +1,135 @@@ + ;***************************************************************************** + ;* SIMD-optimized pixel operations + ;***************************************************************************** + ;* Copyright (c) 2000, 2001 Fabrice Bellard + ;* Copyright (c) 2002-2004 Michael Niedermayer + ;* -;* This file is part of Libav. ++;* This file is part of FFmpeg. + ;* -;* Libav is free software; you can redistribute it and/or ++;* FFmpeg is free software; you can redistribute it and/or + ;* modify it under the terms of the GNU Lesser General Public + ;* License as published by the Free Software Foundation; either + ;* version 2.1 of the License, or (at your option) any later version. + ;* -;* Libav is distributed in the hope that it will be useful, ++;* FFmpeg is distributed in the hope that it will be useful, + ;* but WITHOUT ANY WARRANTY; without even the implied warranty of + ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + ;* Lesser General Public License for more details. + ;* + ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software ++;* License along with FFmpeg; if not, write to the Free Software + ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ;***************************************************************************** + + %include "libavutil/x86/x86util.asm" + + SECTION .text + + INIT_MMX mmx + ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) + cglobal get_pixels, 3,4 + movsxdifnidn r2, r2d + add r0, 128 + mov r3, -128 + pxor m7, m7 + .loop: + mova m0, [r1] + mova m2, [r1+r2] + mova m1, m0 + mova m3, m2 + punpcklbw m0, m7 + punpckhbw m1, m7 + punpcklbw m2, m7 + punpckhbw m3, m7 + mova [r0+r3+ 0], m0 + mova [r0+r3+ 8], m1 + mova [r0+r3+16], m2 + mova [r0+r3+24], m3 + lea r1, [r1+r2*2] + add r3, 32 + js .loop + REP_RET + + INIT_XMM sse2 -cglobal get_pixels, 3, 4 ++cglobal get_pixels, 3, 4, 5 + movsxdifnidn r2, r2d + lea r3, [r2*3] + pxor m4, m4 + movh m0, [r1] + movh m1, [r1+r2] + movh m2, [r1+r2*2] + movh m3, [r1+r3] + lea r1, [r1+r2*4] + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + mova [r0], m0 + mova [r0+0x10], m1 + mova [r0+0x20], m2 + mova [r0+0x30], m3 + movh m0, [r1] + movh m1, [r1+r2*1] + movh m2, [r1+r2*2] + movh m3, [r1+r3] + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + mova [r0+0x40], m0 + mova [r0+0x50], m1 + mova [r0+0x60], m2 + mova [r0+0x70], m3 + RET + + INIT_MMX mmx + ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, + ; int stride); + cglobal diff_pixels, 4,5 + movsxdifnidn r3, r3d + pxor m7, m7 + add r0, 128 + mov r4, -128 + .loop: + mova m0, [r1] + mova m2, [r2] + mova m1, m0 + mova m3, m2 + punpcklbw m0, m7 + punpckhbw m1, m7 + punpcklbw m2, m7 + punpckhbw m3, m7 + psubw m0, m2 + psubw m1, m3 + mova [r0+r4+0], m0 + mova [r0+r4+8], m1 + add r1, r3 + add r2, r3 + add r4, 16 + jne .loop + REP_RET ++ ++INIT_XMM sse2 ++cglobal diff_pixels, 4, 5, 5 ++ movsxdifnidn r3, r3d ++ pxor m4, m4 ++ add r0, 128 ++ mov r4, -128 ++.loop: ++ movh m0, [r1] ++ movh m2, [r2] ++ movh m1, [r1+r3] ++ movh m3, [r2+r3] ++ punpcklbw m0, m4 ++ punpcklbw m1, m4 ++ punpcklbw m2, m4 ++ punpcklbw m3, m4 ++ psubw m0, m2 ++ psubw m1, m3 ++ mova [r0+r4+0 ], m0 ++ mova [r0+r4+16], m1 ++ lea r1, [r1+r3*2] ++ lea r2, [r2+r3*2] ++ add r4, 32 ++ jne .loop ++ RET diff --cc libavcodec/x86/pixblockdsp_init.c index 00000000000,9582e0b5c28..4c31b802ff1 mode 000000,100644..100644 --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@@ -1,0 -1,47 +1,50 @@@ + /* + * SIMD-optimized pixel operations + * - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + #include "libavutil/attributes.h" + #include "libavutil/cpu.h" + #include "libavutil/x86/cpu.h" + #include "libavcodec/pixblockdsp.h" + + void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); + void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); + void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, + int stride); ++void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, ++ int stride); + + av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) + { + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_mmx; + c->diff_pixels = ff_diff_pixels_mmx; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_sse2; ++ c->diff_pixels = ff_diff_pixels_sse2; + } + } diff --cc libavfilter/vf_mpdecimate.c index 099622a0295,00000000000..c667a9f4ccd mode 100644,000000..100644 --- a/libavfilter/vf_mpdecimate.c +++ b/libavfilter/vf_mpdecimate.c @@@ -1,257 -1,0 +1,261 @@@ +/* + * Copyright (c) 2003 Rich Felker + * Copyright (c) 2012 Stefano Sabatini + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/** + * @file mpdecimate filter, ported from libmpcodecs/vf_decimate.c by + * Rich Felker. + */ + +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "libavutil/timestamp.h" +#include "libavcodec/dsputil.h" ++#include "libavcodec/pixblockdsp.h" +#include "avfilter.h" +#include "internal.h" +#include "formats.h" +#include "video.h" + +typedef struct { + const AVClass *class; + int lo, hi; ///< lower and higher threshold number of differences + ///< values for 8x8 blocks + + float frac; ///< threshold of changed pixels over the total fraction + + int max_drop_count; ///< if positive: maximum number of sequential frames to drop + ///< if negative: minimum number of frames between two drops + + int drop_count; ///< if positive: number of frames sequentially dropped + ///< if negative: number of sequential frames which were not dropped + + int hsub, vsub; ///< chroma subsampling values + AVFrame *ref; ///< reference picture + DSPContext dspctx; ///< context providing optimized diff routines ++ PixblockDSPContext pdsp; + AVCodecContext *avctx; ///< codec context required for the DSPContext +} DecimateContext; + +#define OFFSET(x) offsetof(DecimateContext, x) +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM + +static const AVOption mpdecimate_options[] = { + { "max", "set the maximum number of consecutive dropped frames (positive), or the minimum interval between dropped frames (negative)", + OFFSET(max_drop_count), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX, FLAGS }, + { "hi", "set high dropping threshold", OFFSET(hi), AV_OPT_TYPE_INT, {.i64=64*12}, INT_MIN, INT_MAX, FLAGS }, + { "lo", "set low dropping threshold", OFFSET(lo), AV_OPT_TYPE_INT, {.i64=64*5}, INT_MIN, INT_MAX, FLAGS }, + { "frac", "set fraction dropping threshold", OFFSET(frac), AV_OPT_TYPE_FLOAT, {.dbl=0.33}, 0, 1, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(mpdecimate); + +/** + * Return 1 if the two planes are different, 0 otherwise. + */ +static int diff_planes(AVFilterContext *ctx, + uint8_t *cur, uint8_t *ref, int linesize, + int w, int h) +{ + DecimateContext *decimate = ctx->priv; + DSPContext *dspctx = &decimate->dspctx; ++ PixblockDSPContext *pdsp = &decimate->pdsp; + + int x, y; + int d, c = 0; + int t = (w/16)*(h/16)*decimate->frac; + int16_t block[8*8]; + + /* compute difference for blocks of 8x8 bytes */ + for (y = 0; y < h-7; y += 4) { + for (x = 8; x < w-7; x += 4) { - dspctx->diff_pixels(block, ++ pdsp->diff_pixels(block, + cur+x+y*linesize, + ref+x+y*linesize, linesize); + d = dspctx->sum_abs_dctelem(block); + if (d > decimate->hi) + return 1; + if (d > decimate->lo) { + c++; + if (c > t) + return 1; + } + } + } + return 0; +} + +/** + * Tell if the frame should be decimated, for example if it is no much + * different with respect to the reference frame ref. + */ +static int decimate_frame(AVFilterContext *ctx, + AVFrame *cur, AVFrame *ref) +{ + DecimateContext *decimate = ctx->priv; + int plane; + + if (decimate->max_drop_count > 0 && + decimate->drop_count >= decimate->max_drop_count) + return 0; + if (decimate->max_drop_count < 0 && + (decimate->drop_count-1) > decimate->max_drop_count) + return 0; + + for (plane = 0; ref->data[plane] && ref->linesize[plane]; plane++) { + int vsub = plane == 1 || plane == 2 ? decimate->vsub : 0; + int hsub = plane == 1 || plane == 2 ? decimate->hsub : 0; + if (diff_planes(ctx, + cur->data[plane], ref->data[plane], ref->linesize[plane], + FF_CEIL_RSHIFT(ref->width, hsub), + FF_CEIL_RSHIFT(ref->height, vsub))) + return 0; + } + + return 1; +} + +static av_cold int init(AVFilterContext *ctx) +{ + DecimateContext *decimate = ctx->priv; + + av_log(ctx, AV_LOG_VERBOSE, "max_drop_count:%d hi:%d lo:%d frac:%f\n", + decimate->max_drop_count, decimate->hi, decimate->lo, decimate->frac); + + decimate->avctx = avcodec_alloc_context3(NULL); + if (!decimate->avctx) + return AVERROR(ENOMEM); + avpriv_dsputil_init(&decimate->dspctx, decimate->avctx); ++ ff_pixblockdsp_init(&decimate->pdsp, decimate->avctx); + + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + DecimateContext *decimate = ctx->priv; + av_frame_free(&decimate->ref); + if (decimate->avctx) { + avcodec_close(decimate->avctx); + av_freep(&decimate->avctx); + } +} + +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, + AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P, + AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, + AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P, + AV_PIX_FMT_YUVA420P, + AV_PIX_FMT_NONE + }; + + ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); + + return 0; +} + +static int config_input(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + DecimateContext *decimate = ctx->priv; + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format); + decimate->hsub = pix_desc->log2_chroma_w; + decimate->vsub = pix_desc->log2_chroma_h; + + return 0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *cur) +{ + DecimateContext *decimate = inlink->dst->priv; + AVFilterLink *outlink = inlink->dst->outputs[0]; + int ret; + + if (decimate->ref && decimate_frame(inlink->dst, cur, decimate->ref)) { + decimate->drop_count = FFMAX(1, decimate->drop_count+1); + } else { + av_frame_free(&decimate->ref); + decimate->ref = cur; + decimate->drop_count = FFMIN(-1, decimate->drop_count-1); + + if (ret = ff_filter_frame(outlink, av_frame_clone(cur)) < 0) + return ret; + } + + av_log(inlink->dst, AV_LOG_DEBUG, + "%s pts:%s pts_time:%s drop_count:%d\n", + decimate->drop_count > 0 ? "drop" : "keep", + av_ts2str(cur->pts), av_ts2timestr(cur->pts, &inlink->time_base), + decimate->drop_count); + + if (decimate->drop_count > 0) + av_frame_free(&cur); + + return 0; +} + +static int request_frame(AVFilterLink *outlink) +{ + DecimateContext *decimate = outlink->src->priv; + AVFilterLink *inlink = outlink->src->inputs[0]; + int ret; + + do { + ret = ff_request_frame(inlink); + } while (decimate->drop_count > 0 && ret >= 0); + + return ret; +} + +static const AVFilterPad mpdecimate_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_input, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad mpdecimate_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .request_frame = request_frame, + }, + { NULL } +}; + +AVFilter ff_vf_mpdecimate = { + .name = "mpdecimate", + .description = NULL_IF_CONFIG_SMALL("Remove near-duplicate frames."), + .init = init, + .uninit = uninit, + .priv_size = sizeof(DecimateContext), + .priv_class = &mpdecimate_class, + .query_formats = query_formats, + .inputs = mpdecimate_inputs, + .outputs = mpdecimate_outputs, +}; diff --cc libavfilter/vf_spp.c index 9df87ff3f26,00000000000..4e4a5795f47 mode 100644,000000..100644 --- a/libavfilter/vf_spp.c +++ b/libavfilter/vf_spp.c @@@ -1,439 -1,0 +1,439 @@@ +/* + * Copyright (c) 2003 Michael Niedermayer + * Copyright (c) 2013 Clément Bœsch + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/** + * @file + * Simple post processing filter + * + * This implementation is based on an algorithm described in + * "Aria Nosratinia Embedded Post-Processing for + * Enhancement of Compressed Images (1999)" + * + * Originally written by Michael Niedermayer for the MPlayer project, and + * ported by Clément Bœsch for FFmpeg. + */ + +#include "libavcodec/dsputil.h" +#include "libavutil/avassert.h" +#include "libavutil/imgutils.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "internal.h" +#include "vf_spp.h" + +enum mode { + MODE_HARD, + MODE_SOFT, + NB_MODES +}; + +#define OFFSET(x) offsetof(SPPContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM +static const AVOption spp_options[] = { + { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 3}, 0, MAX_LEVEL, FLAGS }, + { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 63, FLAGS }, + { "mode", "set thresholding mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_HARD}, 0, NB_MODES - 1, FLAGS, "mode" }, + { "hard", "hard thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_HARD}, INT_MIN, INT_MAX, FLAGS, "mode" }, + { "soft", "soft thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_SOFT}, INT_MIN, INT_MAX, FLAGS, "mode" }, + { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(spp); + +// XXX: share between filters? +DECLARE_ALIGNED(8, static const uint8_t, ldither)[8][8] = { + { 0, 48, 12, 60, 3, 51, 15, 63 }, + { 32, 16, 44, 28, 35, 19, 47, 31 }, + { 8, 56, 4, 52, 11, 59, 7, 55 }, + { 40, 24, 36, 20, 43, 27, 39, 23 }, + { 2, 50, 14, 62, 1, 49, 13, 61 }, + { 34, 18, 46, 30, 33, 17, 45, 29 }, + { 10, 58, 6, 54, 9, 57, 5, 53 }, + { 42, 26, 38, 22, 41, 25, 37, 21 }, +}; + +static const uint8_t offset[127][2] = { + {0,0}, + {0,0}, {4,4}, // quality = 1 + {0,0}, {2,2}, {6,4}, {4,6}, // quality = 2 + {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7}, // quality = 3 + + {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3}, // quality = 4 + {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7}, + + {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7}, // quality = 5 + {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7}, + {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7}, + {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7}, + + {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2}, // quality = 6 + {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0}, + {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3}, + {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1}, + {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3}, + {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1}, + {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2}, + {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0}, +}; + +static void hardthresh_c(int16_t dst[64], const int16_t src[64], + int qp, const uint8_t *permutation) +{ + int i; + int bias = 0; // FIXME + + unsigned threshold1 = qp * ((1<<4) - bias) - 1; + unsigned threshold2 = threshold1 << 1; + + memset(dst, 0, 64 * sizeof(dst[0])); + dst[0] = (src[0] + 4) >> 3; + + for (i = 1; i < 64; i++) { + int level = src[i]; + if (((unsigned)(level + threshold1)) > threshold2) { + const int j = permutation[i]; + dst[j] = (level + 4) >> 3; + } + } +} + +static void softthresh_c(int16_t dst[64], const int16_t src[64], + int qp, const uint8_t *permutation) +{ + int i; + int bias = 0; //FIXME + + unsigned threshold1 = qp * ((1<<4) - bias) - 1; + unsigned threshold2 = threshold1 << 1; + + memset(dst, 0, 64 * sizeof(dst[0])); + dst[0] = (src[0] + 4) >> 3; + + for (i = 1; i < 64; i++) { + int level = src[i]; + if (((unsigned)(level + threshold1)) > threshold2) { + const int j = permutation[i]; + if (level > 0) dst[j] = (level - threshold1 + 4) >> 3; + else dst[j] = (level + threshold1 + 4) >> 3; + } + } +} + +static void store_slice_c(uint8_t *dst, const int16_t *src, + int dst_linesize, int src_linesize, + int width, int height, int log2_scale, + const uint8_t dither[8][8]) +{ + int y, x; + +#define STORE(pos) do { \ + temp = ((src[x + y*src_linesize + pos] << log2_scale) + d[pos]) >> 6; \ + if (temp & 0x100) \ + temp = ~(temp >> 31); \ + dst[x + y*dst_linesize + pos] = temp; \ +} while (0) + + for (y = 0; y < height; y++) { + const uint8_t *d = dither[y]; + for (x = 0; x < width; x += 8) { + int temp; + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + STORE(6); + STORE(7); + } + } +} + +static inline void add_block(int16_t *dst, int linesize, const int16_t block[64]) +{ + int y; + + for (y = 0; y < 8; y++) { + *(uint32_t *)&dst[0 + y*linesize] += *(uint32_t *)&block[0 + y*8]; + *(uint32_t *)&dst[2 + y*linesize] += *(uint32_t *)&block[2 + y*8]; + *(uint32_t *)&dst[4 + y*linesize] += *(uint32_t *)&block[4 + y*8]; + *(uint32_t *)&dst[6 + y*linesize] += *(uint32_t *)&block[6 + y*8]; + } +} + +// XXX: export the function? +static inline int norm_qscale(int qscale, int type) +{ + switch (type) { + case FF_QSCALE_TYPE_MPEG1: return qscale; + case FF_QSCALE_TYPE_MPEG2: return qscale >> 1; + case FF_QSCALE_TYPE_H264: return qscale >> 2; + case FF_QSCALE_TYPE_VP56: return (63 - qscale + 2) >> 2; + } + return qscale; +} + +static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, + int dst_linesize, int src_linesize, int width, int height, + const uint8_t *qp_table, int qp_stride, int is_luma) +{ + int x, y, i; + const int count = 1 << p->log2_count; + const int linesize = is_luma ? p->temp_linesize : FFALIGN(width+16, 16); + DECLARE_ALIGNED(16, uint64_t, block_align)[32]; + int16_t *block = (int16_t *)block_align; + int16_t *block2 = (int16_t *)(block_align + 16); + + for (y = 0; y < height; y++) { + int index = 8 + 8*linesize + y*linesize; + memcpy(p->src + index, src + y*src_linesize, width); + for (x = 0; x < 8; x++) { + p->src[index - x - 1] = p->src[index + x ]; + p->src[index + width + x ] = p->src[index + width - x - 1]; + } + } + for (y = 0; y < 8; y++) { + memcpy(p->src + ( 7-y)*linesize, p->src + ( y+8)*linesize, linesize); + memcpy(p->src + (height+8+y)*linesize, p->src + (height-y+7)*linesize, linesize); + } + + for (y = 0; y < height + 8; y += 8) { + memset(p->temp + (8 + y) * linesize, 0, 8 * linesize * sizeof(*p->temp)); + for (x = 0; x < width + 8; x += 8) { + int qp; + + if (p->qp) { + qp = p->qp; + } else{ + const int qps = 3 + is_luma; + qp = qp_table[(FFMIN(x, width - 1) >> qps) + (FFMIN(y, height - 1) >> qps) * qp_stride]; + qp = FFMAX(1, norm_qscale(qp, p->qscale_type)); + } + for (i = 0; i < count; i++) { + const int x1 = x + offset[i + count - 1][0]; + const int y1 = y + offset[i + count - 1][1]; + const int index = x1 + y1*linesize; - p->dsp.get_pixels(block, p->src + index, linesize); ++ p->pdsp.get_pixels(block, p->src + index, linesize); + p->fdsp.fdct(block); + p->requantize(block2, block, qp, p->idsp.idct_permutation); + p->idsp.idct(block2); + add_block(p->temp + index, linesize, block2); + } + } + if (y) + p->store_slice(dst + (y - 8) * dst_linesize, p->temp + 8 + y*linesize, + dst_linesize, linesize, width, + FFMIN(8, height + 8 - y), MAX_LEVEL - p->log2_count, + ldither); + } +} + +static int query_formats(AVFilterContext *ctx) +{ + static const enum PixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, + AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P, + AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, + AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P, + AV_PIX_FMT_NONE + }; + ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); + return 0; +} + +static int config_input(AVFilterLink *inlink) +{ + SPPContext *spp = inlink->dst->priv; + const int h = FFALIGN(inlink->h + 16, 16); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + + spp->hsub = desc->log2_chroma_w; + spp->vsub = desc->log2_chroma_h; + spp->temp_linesize = FFALIGN(inlink->w + 16, 16); + spp->temp = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->temp)); + spp->src = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->src)); + if (!spp->use_bframe_qp) { + /* we are assuming here the qp blocks will not be smaller that 16x16 */ + spp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4); + spp->non_b_qp_table = av_calloc(spp->non_b_qp_alloc_size, sizeof(*spp->non_b_qp_table)); + if (!spp->non_b_qp_table) + return AVERROR(ENOMEM); + } + if (!spp->temp || !spp->src) + return AVERROR(ENOMEM); + return 0; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + SPPContext *spp = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + AVFrame *out = in; + int qp_stride = 0; + const int8_t *qp_table = NULL; + + /* if we are not in a constant user quantizer mode and we don't want to use + * the quantizers from the B-frames (B-frames often have a higher QP), we + * need to save the qp table from the last non B-frame; this is what the + * following code block does */ + if (!spp->qp) { + qp_table = av_frame_get_qp_table(in, &qp_stride, &spp->qscale_type); + + if (qp_table && !spp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) { + int w, h; + + /* if the qp stride is not set, it means the QP are only defined on + * a line basis */ + if (!qp_stride) { + w = FF_CEIL_RSHIFT(inlink->w, 4); + h = 1; + } else { + w = FF_CEIL_RSHIFT(qp_stride, 4); + h = FF_CEIL_RSHIFT(inlink->h, 4); + } + av_assert0(w * h <= spp->non_b_qp_alloc_size); + memcpy(spp->non_b_qp_table, qp_table, w * h); + } + } + + if (spp->log2_count && !ctx->is_disabled) { + if (!spp->use_bframe_qp && spp->non_b_qp_table) + qp_table = spp->non_b_qp_table; + + if (qp_table || spp->qp) { + const int cw = FF_CEIL_RSHIFT(inlink->w, spp->hsub); + const int ch = FF_CEIL_RSHIFT(inlink->h, spp->vsub); + + /* get a new frame if in-place is not possible or if the dimensions + * are not multiple of 8 */ + if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) { + const int aligned_w = FFALIGN(inlink->w, 8); + const int aligned_h = FFALIGN(inlink->h, 8); + + out = ff_get_video_buffer(outlink, aligned_w, aligned_h); + if (!out) { + av_frame_free(&in); + return AVERROR(ENOMEM); + } + av_frame_copy_props(out, in); + out->width = in->width; + out->height = in->height; + } + + filter(spp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1); + filter(spp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw, ch, qp_table, qp_stride, 0); + filter(spp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw, ch, qp_table, qp_stride, 0); + emms_c(); + } + } + + if (in != out) { + if (in->data[3]) + av_image_copy_plane(out->data[3], out->linesize[3], + in ->data[3], in ->linesize[3], + inlink->w, inlink->h); + av_frame_free(&in); + } + return ff_filter_frame(outlink, out); +} + +static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, + char *res, int res_len, int flags) +{ + SPPContext *spp = ctx->priv; + + if (!strcmp(cmd, "level")) { + if (!strcmp(args, "max")) + spp->log2_count = MAX_LEVEL; + else + spp->log2_count = av_clip(strtol(args, NULL, 10), 0, MAX_LEVEL); + return 0; + } + return AVERROR(ENOSYS); +} + +static av_cold int init(AVFilterContext *ctx) +{ + SPPContext *spp = ctx->priv; + + spp->avctx = avcodec_alloc_context3(NULL); + if (!spp->avctx) + return AVERROR(ENOMEM); - avpriv_dsputil_init(&spp->dsp, spp->avctx); + ff_idctdsp_init(&spp->idsp, spp->avctx); + ff_fdctdsp_init(&spp->fdsp, spp->avctx); ++ ff_pixblockdsp_init(&spp->pdsp, spp->avctx); + spp->store_slice = store_slice_c; + switch (spp->mode) { + case MODE_HARD: spp->requantize = hardthresh_c; break; + case MODE_SOFT: spp->requantize = softthresh_c; break; + } + if (ARCH_X86) + ff_spp_init_x86(spp); + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + SPPContext *spp = ctx->priv; + + av_freep(&spp->temp); + av_freep(&spp->src); + if (spp->avctx) { + avcodec_close(spp->avctx); + av_freep(&spp->avctx); + } + av_freep(&spp->non_b_qp_table); +} + +static const AVFilterPad spp_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_input, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad spp_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter ff_vf_spp = { + .name = "spp", + .description = NULL_IF_CONFIG_SMALL("Apply a simple post processing filter."), + .priv_size = sizeof(SPPContext), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + .inputs = spp_inputs, + .outputs = spp_outputs, + .process_command = process_command, + .priv_class = &spp_class, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, +}; diff --cc libavfilter/vf_spp.h index 909d4de8122,00000000000..c8eac3caf29 mode 100644,000000..100644 --- a/libavfilter/vf_spp.h +++ b/libavfilter/vf_spp.h @@@ -1,63 -1,0 +1,63 @@@ +/* + * Copyright (c) 2003 Michael Niedermayer + * Copyright (c) 2013 Clément Bœsch + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef AVFILTER_SPP_H +#define AVFILTER_SPP_H + +#include "libavcodec/avcodec.h" - #include "libavcodec/dsputil.h" ++#include "libavcodec/pixblockdsp.h" +#include "libavcodec/idctdsp.h" +#include "libavcodec/fdctdsp.h" +#include "avfilter.h" + +#define MAX_LEVEL 6 /* quality levels */ + +typedef struct { + const AVClass *av_class; + + int log2_count; + int qp; + int mode; + int qscale_type; + int temp_linesize; + uint8_t *src; + int16_t *temp; + AVCodecContext *avctx; - DSPContext dsp; + IDCTDSPContext idsp; + FDCTDSPContext fdsp; ++ PixblockDSPContext pdsp; + int8_t *non_b_qp_table; + int non_b_qp_alloc_size; + int use_bframe_qp; + int hsub, vsub; + + void (*store_slice)(uint8_t *dst, const int16_t *src, + int dst_stride, int src_stride, + int width, int height, int log2_scale, + const uint8_t dither[8][8]); + + void (*requantize)(int16_t dst[64], const int16_t src[64], + int qp, const uint8_t *permutation); +} SPPContext; + +void ff_spp_init_x86(SPPContext *s); + +#endif /* AVFILTER_SPP_H */