git.sesse.net Git - ffmpeg/blob - libavcodec/x86/dsputil_mmx.h

   1 /*
   2  * MMX optimized DSP utils
   3  * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #ifndef AVCODEC_X86_DSPUTIL_MMX_H
  23 #define AVCODEC_X86_DSPUTIL_MMX_H
  24
  25 #include <stddef.h>
  26 #include <stdint.h>
  27
  28 #include "libavcodec/dsputil.h"
  29 #include "libavutil/x86/asm.h"
  30 #include "constants.h"
  31
  32 #define MOVQ_WONE(regd) \
  33     __asm__ volatile ( \
  34     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
  35     "psrlw $15, %%" #regd ::)
  36
  37 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
  38 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
  39
  40 #define MOVQ_BFE(regd)                                  \
  41     __asm__ volatile (                                  \
  42         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
  43         "paddb   %%"#regd", %%"#regd"   \n\t" ::)
  44
  45 #ifndef PIC
  46 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
  47 #else
  48 // for shared library it's better to use this way for accessing constants
  49 // pcmpeqd -> -1
  50 #define MOVQ_WTWO(regd)                                 \
  51     __asm__ volatile (                                  \
  52         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
  53         "psrlw         $15, %%"#regd"   \n\t"           \
  54         "psllw          $1, %%"#regd"   \n\t"::)
  55
  56 #endif
  57
  58 // using regr as temporary and for the output result
  59 // first argument is unmodifed and second is trashed
  60 // regfe is supposed to contain 0xfefefefefefefefe
  61 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)                \
  62     "movq   "#rega", "#regr"            \n\t"                    \
  63     "pand   "#regb", "#regr"            \n\t"                    \
  64     "pxor   "#rega", "#regb"            \n\t"                    \
  65     "pand  "#regfe", "#regb"            \n\t"                    \
  66     "psrlq       $1, "#regb"            \n\t"                    \
  67     "paddb  "#regb", "#regr"            \n\t"
  68
  69 #define PAVGB_MMX(rega, regb, regr, regfe)                       \
  70     "movq   "#rega", "#regr"            \n\t"                    \
  71     "por    "#regb", "#regr"            \n\t"                    \
  72     "pxor   "#rega", "#regb"            \n\t"                    \
  73     "pand  "#regfe", "#regb"            \n\t"                    \
  74     "psrlq       $1, "#regb"            \n\t"                    \
  75     "psubb  "#regb", "#regr"            \n\t"
  76
  77 // mm6 is supposed to contain 0xfefefefefefefefe
  78 #define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
  79     "movq  "#rega", "#regr"             \n\t"                    \
  80     "movq  "#regc", "#regp"             \n\t"                    \
  81     "pand  "#regb", "#regr"             \n\t"                    \
  82     "pand  "#regd", "#regp"             \n\t"                    \
  83     "pxor  "#rega", "#regb"             \n\t"                    \
  84     "pxor  "#regc", "#regd"             \n\t"                    \
  85     "pand    %%mm6, "#regb"             \n\t"                    \
  86     "pand    %%mm6, "#regd"             \n\t"                    \
  87     "psrlq      $1, "#regb"             \n\t"                    \
  88     "psrlq      $1, "#regd"             \n\t"                    \
  89     "paddb "#regb", "#regr"             \n\t"                    \
  90     "paddb "#regd", "#regp"             \n\t"
  91
  92 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \
  93     "movq  "#rega", "#regr"             \n\t"                    \
  94     "movq  "#regc", "#regp"             \n\t"                    \
  95     "por   "#regb", "#regr"             \n\t"                    \
  96     "por   "#regd", "#regp"             \n\t"                    \
  97     "pxor  "#rega", "#regb"             \n\t"                    \
  98     "pxor  "#regc", "#regd"             \n\t"                    \
  99     "pand    %%mm6, "#regb"             \n\t"                    \
 100     "pand    %%mm6, "#regd"             \n\t"                    \
 101     "psrlq      $1, "#regd"             \n\t"                    \
 102     "psrlq      $1, "#regb"             \n\t"                    \
 103     "psubb "#regb", "#regr"             \n\t"                    \
 104     "psubb "#regd", "#regp"             \n\t"
 105
 106 void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
 107 void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
 108
 109 void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
 110 void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
 111 void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
 112
 113
 114 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
 115                         ptrdiff_t line_size, int h);
 116 void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
 117                          ptrdiff_t line_size, int h);
 118 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
 119                         ptrdiff_t line_size, int h);
 120 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
 121                          ptrdiff_t line_size, int h);
 122 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
 123                            ptrdiff_t line_size, int h);
 124 void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
 125                            ptrdiff_t line_size, int h);
 126 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
 127                           ptrdiff_t line_size, int h);
 128 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
 129                           ptrdiff_t line_size, int h);
 130
 131 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
 132                            ptrdiff_t line_size, int h);
 133
 134 void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 135                             ptrdiff_t line_size, int h);
 136 void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 137                              ptrdiff_t line_size, int h);
 138
 139 void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 140                             ptrdiff_t line_size, int h);
 141 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 142                              ptrdiff_t line_size, int h);
 143
 144 void ff_deinterlace_line_mmx(uint8_t *dst,
 145                              const uint8_t *lum_m4, const uint8_t *lum_m3,
 146                              const uint8_t *lum_m2, const uint8_t *lum_m1,
 147                              const uint8_t *lum,
 148                              int size);
 149
 150 void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
 151                                      const uint8_t *lum_m3,
 152                                      const uint8_t *lum_m2,
 153                                      const uint8_t *lum_m1,
 154                                      const uint8_t *lum, int size);
 155
 156 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */