git.sesse.net Git - ffmpeg/blob - libavcodec/x86/fpel_mmx.c

   1 /*
   2  * MMX-optimized avg/put pixel routines
   3  *
   4  * Copyright (c) 2000, 2001 Fabrice Bellard
   5  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   6  *
   7  * This file is part of Libav.
   8  *
   9  * Libav is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * Libav is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with Libav; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include <stddef.h>
  25 #include <stdint.h>
  26
  27 #include "config.h"
  28 #include "dsputil_mmx.h"
  29
  30 #if HAVE_MMX_INLINE
  31
  32 // in case more speed is needed - unrolling would certainly help
  33 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  34                         ptrdiff_t line_size, int h)
  35 {
  36     MOVQ_BFE(mm6);
  37     JUMPALIGN();
  38     do {
  39         __asm__ volatile(
  40              "movq  %0, %%mm0           \n\t"
  41              "movq  %1, %%mm1           \n\t"
  42              PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  43              "movq  %%mm2, %0           \n\t"
  44              :"+m"(*block)
  45              :"m"(*pixels)
  46              :"memory");
  47         pixels += line_size;
  48         block += line_size;
  49     }
  50     while (--h);
  51 }
  52
  53 void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
  54                          ptrdiff_t line_size, int h)
  55 {
  56     MOVQ_BFE(mm6);
  57     JUMPALIGN();
  58     do {
  59         __asm__ volatile(
  60              "movq  %0, %%mm0           \n\t"
  61              "movq  %1, %%mm1           \n\t"
  62              PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  63              "movq  %%mm2, %0           \n\t"
  64              "movq  8%0, %%mm0          \n\t"
  65              "movq  8%1, %%mm1          \n\t"
  66              PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  67              "movq  %%mm2, 8%0          \n\t"
  68              :"+m"(*block)
  69              :"m"(*pixels)
  70              :"memory");
  71         pixels += line_size;
  72         block += line_size;
  73     }
  74     while (--h);
  75 }
  76
  77 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  78                         ptrdiff_t line_size, int h)
  79 {
  80     __asm__ volatile (
  81         "lea   (%3, %3), %%"REG_a"      \n\t"
  82         ".p2align     3                 \n\t"
  83         "1:                             \n\t"
  84         "movq  (%1    ), %%mm0          \n\t"
  85         "movq  (%1, %3), %%mm1          \n\t"
  86         "movq     %%mm0, (%2)           \n\t"
  87         "movq     %%mm1, (%2, %3)       \n\t"
  88         "add  %%"REG_a", %1             \n\t"
  89         "add  %%"REG_a", %2             \n\t"
  90         "movq  (%1    ), %%mm0          \n\t"
  91         "movq  (%1, %3), %%mm1          \n\t"
  92         "movq     %%mm0, (%2)           \n\t"
  93         "movq     %%mm1, (%2, %3)       \n\t"
  94         "add  %%"REG_a", %1             \n\t"
  95         "add  %%"REG_a", %2             \n\t"
  96         "subl        $4, %0             \n\t"
  97         "jnz         1b                 \n\t"
  98         : "+g"(h), "+r"(pixels),  "+r"(block)
  99         : "r"((x86_reg)line_size)
 100         : "%"REG_a, "memory"
 101         );
 102 }
 103
 104 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
 105                          ptrdiff_t line_size, int h)
 106 {
 107     __asm__ volatile (
 108         "lea   (%3, %3), %%"REG_a"      \n\t"
 109         ".p2align     3                 \n\t"
 110         "1:                             \n\t"
 111         "movq  (%1    ), %%mm0          \n\t"
 112         "movq 8(%1    ), %%mm4          \n\t"
 113         "movq  (%1, %3), %%mm1          \n\t"
 114         "movq 8(%1, %3), %%mm5          \n\t"
 115         "movq     %%mm0,  (%2)          \n\t"
 116         "movq     %%mm4, 8(%2)          \n\t"
 117         "movq     %%mm1,  (%2, %3)      \n\t"
 118         "movq     %%mm5, 8(%2, %3)      \n\t"
 119         "add  %%"REG_a", %1             \n\t"
 120         "add  %%"REG_a", %2             \n\t"
 121         "movq  (%1    ), %%mm0          \n\t"
 122         "movq 8(%1    ), %%mm4          \n\t"
 123         "movq  (%1, %3), %%mm1          \n\t"
 124         "movq 8(%1, %3), %%mm5          \n\t"
 125         "movq     %%mm0,  (%2)          \n\t"
 126         "movq     %%mm4, 8(%2)          \n\t"
 127         "movq     %%mm1,  (%2, %3)      \n\t"
 128         "movq     %%mm5, 8(%2, %3)      \n\t"
 129         "add  %%"REG_a", %1             \n\t"
 130         "add  %%"REG_a", %2             \n\t"
 131         "subl        $4, %0             \n\t"
 132         "jnz         1b                 \n\t"
 133         : "+g"(h), "+r"(pixels),  "+r"(block)
 134         : "r"((x86_reg)line_size)
 135         : "%"REG_a, "memory"
 136         );
 137 }
 138
 139 #endif /* HAVE_MMX_INLINE */