From 651da7f20399c92550247b097c1cf0a0b296fe6e Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Sun, 2 Jun 2002 23:29:29 +0000 Subject: [PATCH] (ported from v0_4_1_branch) * ./plugins/motion/motionmmx.c: MMX motion optimizations courtesy of Vladimir Chernyshov . --- ChangeLog | 2 + plugins/motion/motionmmx.c | 191 ++++++++++++++++++++++++++++--------- 2 files changed, 148 insertions(+), 45 deletions(-) diff --git a/ChangeLog b/ChangeLog index c064efb0b0..1234899929 100644 --- a/ChangeLog +++ b/ChangeLog @@ -37,6 +37,8 @@ HEAD 0.4.1 Not released yet + * ./plugins/motion/motionmmx.c: MMX motion optimizations courtesy of + Vladimir Chernyshov . * ./plugins/dvdread/dvdread.c: disabled the dvdread plugin because it currently sucks. * ./src/misc/configuration.c, ./src/interface/main.c: we now accept --nofoo diff --git a/plugins/motion/motionmmx.c b/plugins/motion/motionmmx.c index 490c3a08a4..54410832c0 100644 --- a/plugins/motion/motionmmx.c +++ b/plugins/motion/motionmmx.c @@ -2,10 +2,11 @@ * motionmmx.c : MMX motion compensation module for vlc ***************************************************************************** * Copyright (C) 2001 VideoLAN - * $Id: motionmmx.c,v 1.18 2002/06/01 12:32:00 sam Exp $ + * $Id: motionmmx.c,v 1.19 2002/06/02 23:29:29 sam Exp $ * * Authors: Aaron Holtzman * Michel Lespinasse + * Vladimir Chernyshov * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -85,29 +86,23 @@ static inline void mmx_average_2_U8 (yuv_data_t * dest, // // *dest = (*src1 + *src2 + 1)/ 2; // + static mmx_t mask1 = {0x0101010101010101LL}; + static mmx_t mask7f = {0x7f7f7f7f7f7f7f7fLL}; movq_m2r (*src1, mm1); // load 8 src1 bytes - movq_r2r (mm1, mm2); // copy 8 src1 bytes + movq_r2r (mm1, mm2); + psrlq_i2r (1, mm1); + pand_m2r (mask7f, mm1); movq_m2r (*src2, mm3); // load 8 src2 bytes - movq_r2r (mm3, mm4); // copy 8 src2 bytes - - punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes - - punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes - - paddw_r2r (mm3, mm1); // add lows to mm1 - paddw_m2r (round1, mm1); - psraw_i2r (1, mm1); // /2 - - paddw_r2r (mm4, mm2); // add highs to mm2 - paddw_m2r (round1, mm2); - psraw_i2r (1, mm2); // /2 - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1, *dest); // store result in dest + por_r2r (mm3, mm2); + psrlq_i2r (1, mm3); + pand_m2r (mask7f, mm3); + + paddb_r2r (mm1, mm3); + pand_m2r (mask1, mm2); + paddb_r2r (mm3, mm2); + movq_r2m (mm2, *dest); // store result in dest } static inline void mmx_interp_average_2_U8 (yuv_data_t * dest, @@ -406,69 +401,175 @@ static void MC_put_x8_mmx (yuv_data_t * dest, yuv_data_t * ref, //----------------------------------------------------------------------- -static inline void MC_avg_xy_mmx (int width, int height, - yuv_data_t * dest, yuv_data_t * ref, int stride) +static inline void MC_avg_xy_8wide_mmx (int height, yuv_data_t * dest, + yuv_data_t * ref, int stride) { - yuv_data_t * ref_next = ref+stride; + pxor_r2r (mm0, mm0); + movq_m2r (round4, mm7); - mmx_zero_reg (); + movq_m2r (*ref, mm1); // calculate first row ref[0] + ref[1] + movq_r2r (mm1, mm2); + + punpcklbw_r2r (mm0, mm1); + punpckhbw_r2r (mm0, mm2); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm3, mm4); + + punpcklbw_r2r (mm0, mm3); + punpckhbw_r2r (mm0, mm4); + + paddw_r2r (mm3, mm1); + paddw_r2r (mm4, mm2); + + ref += stride; do { - mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); - if (width == 16) - mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, - ref_next+8, ref_next+9); + movq_m2r (*ref, mm5); // calculate next row ref[0] + ref[1] + movq_r2r (mm5, mm6); + + punpcklbw_r2r (mm0, mm5); + punpckhbw_r2r (mm0, mm6); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm3, mm4); + + punpcklbw_r2r (mm0, mm3); + punpckhbw_r2r (mm0, mm4); + + paddw_r2r (mm3, mm5); + paddw_r2r (mm4, mm6); + + movq_r2r (mm7, mm3); // calculate round4 + previous row + current row + movq_r2r (mm7, mm4); + + paddw_r2r (mm1, mm3); + paddw_r2r (mm2, mm4); + + paddw_r2r (mm5, mm3); + paddw_r2r (mm6, mm4); + + psraw_i2r (2, mm3); // /4 + psraw_i2r (2, mm4); // /4 + + movq_m2r (*dest, mm1); // calculate (subtotal + dest[0] + round1) / 2 + movq_r2r (mm1, mm2); + + punpcklbw_r2r (mm0, mm1); + punpckhbw_r2r (mm0, mm2); + + paddw_r2r (mm1, mm3); + paddw_r2r (mm2, mm4); + + paddw_m2r (round1, mm3); + paddw_m2r (round1, mm4); + + psraw_i2r (1, mm3); // /2 + psraw_i2r (1, mm4); // /2 + + packuswb_r2r (mm4, mm3); // pack (w/ saturation) + movq_r2m (mm3, *dest); // store result in dest + + movq_r2r (mm5, mm1); // remember current row for the next pass + movq_r2r (mm6, mm2); + + ref += stride; + dest += stride; - dest += stride; - ref += stride; - ref_next += stride; } while (--height); } static void MC_avg_xy16_mmx (yuv_data_t * dest, yuv_data_t * ref, int stride, int height) { - MC_avg_xy_mmx (16, height, dest, ref, stride); + MC_avg_xy_8wide_mmx(height, dest, ref, stride); + MC_avg_xy_8wide_mmx(height, dest+8, ref+8, stride); } static void MC_avg_xy8_mmx (yuv_data_t * dest, yuv_data_t * ref, int stride, int height) { - MC_avg_xy_mmx (8, height, dest, ref, stride); + MC_avg_xy_8wide_mmx(height, dest, ref, stride); } //----------------------------------------------------------------------- -static inline void MC_put_xy_mmx (int width, int height, - yuv_data_t * dest, yuv_data_t * ref, int stride) +static inline void MC_put_xy_8wide_mmx (int height, yuv_data_t * dest, + yuv_data_t * ref, int stride) { - yuv_data_t * ref_next = ref+stride; + pxor_r2r (mm0, mm0); + movq_m2r (round4, mm7); - mmx_zero_reg (); + movq_m2r (*ref, mm1); // calculate first row ref[0] + ref[1] + movq_r2r (mm1, mm2); + + punpcklbw_r2r (mm0, mm1); + punpckhbw_r2r (mm0, mm2); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm3, mm4); + + punpcklbw_r2r (mm0, mm3); + punpckhbw_r2r (mm0, mm4); + + paddw_r2r (mm3, mm1); + paddw_r2r (mm4, mm2); + + ref += stride; do { - mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); - if (width == 16) - mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); + movq_m2r (*ref, mm5); // calculate next row ref[0] + ref[1] + movq_r2r (mm5, mm6); + + punpcklbw_r2r (mm0, mm5); + punpckhbw_r2r (mm0, mm6); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm3, mm4); + + punpcklbw_r2r (mm0, mm3); + punpckhbw_r2r (mm0, mm4); + + paddw_r2r (mm3, mm5); + paddw_r2r (mm4, mm6); + + movq_r2r (mm7, mm3); // calculate round4 + previous row + current row + movq_r2r (mm7, mm4); + + paddw_r2r (mm1, mm3); + paddw_r2r (mm2, mm4); + + paddw_r2r (mm5, mm3); + paddw_r2r (mm6, mm4); + + psraw_i2r (2, mm3); // /4 + psraw_i2r (2, mm4); // /4 + + packuswb_r2r (mm4, mm3); // pack (w/ saturation) + movq_r2m (mm3, *dest); // store result in dest + + movq_r2r (mm5, mm1); // advance to the next row + movq_r2r (mm6, mm2); + + ref += stride; + dest += stride; - dest += stride; - ref += stride; - ref_next += stride; } while (--height); } static void MC_put_xy16_mmx (yuv_data_t * dest, yuv_data_t * ref, int stride, int height) { - MC_put_xy_mmx (16, height, dest, ref, stride); + MC_put_xy_8wide_mmx(height, dest, ref, stride); + MC_put_xy_8wide_mmx(height, dest + 8, ref + 8, stride); } static void MC_put_xy8_mmx (yuv_data_t * dest, yuv_data_t * ref, int stride, int height) { - MC_put_xy_mmx (8, height, dest, ref, stride); + MC_put_xy_8wide_mmx(height, dest, ref, stride); } //----------------------------------------------------------------------- -- 2.39.2