git.sesse.net Git - ffmpeg/blob - libavcodec/x86/dsputil_x86.h

   1 /*
   2  * MMX optimized DSP utils
   3  * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #ifndef AVCODEC_X86_DSPUTIL_MMX_H
  23 #define AVCODEC_X86_DSPUTIL_MMX_H
  24
  25 #include <stddef.h>
  26 #include <stdint.h>
  27
  28 #include "libavcodec/dsputil.h"
  29 #include "libavutil/x86/asm.h"
  30 #include "constants.h"
  31
  32 #define MOVQ_WONE(regd) \
  33     __asm__ volatile ( \
  34     "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
  35     "psrlw $15, %%" #regd ::)
  36
  37 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
  38 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
  39
  40 #define MOVQ_BFE(regd)                                  \
  41     __asm__ volatile (                                  \
  42         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
  43         "paddb   %%"#regd", %%"#regd"   \n\t" ::)
  44
  45 #ifndef PIC
  46 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
  47 #else
  48 // for shared library it's better to use this way for accessing constants
  49 // pcmpeqd -> -1
  50 #define MOVQ_WTWO(regd)                                 \
  51     __asm__ volatile (                                  \
  52         "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
  53         "psrlw         $15, %%"#regd"   \n\t"           \
  54         "psllw          $1, %%"#regd"   \n\t"::)
  55
  56 #endif
  57
  58 // using regr as temporary and for the output result
  59 // first argument is unmodifed and second is trashed
  60 // regfe is supposed to contain 0xfefefefefefefefe
  61 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)                \
  62     "movq   "#rega", "#regr"            \n\t"                    \
  63     "pand   "#regb", "#regr"            \n\t"                    \
  64     "pxor   "#rega", "#regb"            \n\t"                    \
  65     "pand  "#regfe", "#regb"            \n\t"                    \
  66     "psrlq       $1, "#regb"            \n\t"                    \
  67     "paddb  "#regb", "#regr"            \n\t"
  68
  69 #define PAVGB_MMX(rega, regb, regr, regfe)                       \
  70     "movq   "#rega", "#regr"            \n\t"                    \
  71     "por    "#regb", "#regr"            \n\t"                    \
  72     "pxor   "#rega", "#regb"            \n\t"                    \
  73     "pand  "#regfe", "#regb"            \n\t"                    \
  74     "psrlq       $1, "#regb"            \n\t"                    \
  75     "psubb  "#regb", "#regr"            \n\t"
  76
  77 // mm6 is supposed to contain 0xfefefefefefefefe
  78 #define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
  79     "movq  "#rega", "#regr"             \n\t"                    \
  80     "movq  "#regc", "#regp"             \n\t"                    \
  81     "pand  "#regb", "#regr"             \n\t"                    \
  82     "pand  "#regd", "#regp"             \n\t"                    \
  83     "pxor  "#rega", "#regb"             \n\t"                    \
  84     "pxor  "#regc", "#regd"             \n\t"                    \
  85     "pand    %%mm6, "#regb"             \n\t"                    \
  86     "pand    %%mm6, "#regd"             \n\t"                    \
  87     "psrlq      $1, "#regb"             \n\t"                    \
  88     "psrlq      $1, "#regd"             \n\t"                    \
  89     "paddb "#regb", "#regr"             \n\t"                    \
  90     "paddb "#regd", "#regp"             \n\t"
  91
  92 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \
  93     "movq  "#rega", "#regr"             \n\t"                    \
  94     "movq  "#regc", "#regp"             \n\t"                    \
  95     "por   "#regb", "#regr"             \n\t"                    \
  96     "por   "#regd", "#regp"             \n\t"                    \
  97     "pxor  "#rega", "#regb"             \n\t"                    \
  98     "pxor  "#regc", "#regd"             \n\t"                    \
  99     "pand    %%mm6, "#regb"             \n\t"                    \
 100     "pand    %%mm6, "#regd"             \n\t"                    \
 101     "psrlq      $1, "#regd"             \n\t"                    \
 102     "psrlq      $1, "#regb"             \n\t"                    \
 103     "psubb "#regb", "#regr"             \n\t"                    \
 104     "psubb "#regd", "#regp"             \n\t"
 105
 106 void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
 107 void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
 108
 109 void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
 110 void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
 111 void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
 112
 113 void ff_clear_block_mmx(int16_t *block);
 114 void ff_clear_block_sse(int16_t *block);
 115 void ff_clear_blocks_mmx(int16_t *blocks);
 116 void ff_clear_blocks_sse(int16_t *blocks);
 117
 118 void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w);
 119
 120 void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
 121                                         const uint8_t *diff, int w,
 122                                         int *left, int *left_top);
 123
 124 void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
 125                        int w, int h, int sides);
 126
 127 void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
 128                 int stride, int h, int ox, int oy,
 129                 int dxx, int dxy, int dyx, int dyy,
 130                 int shift, int r, int width, int height);
 131
 132 void ff_gmc_sse(uint8_t *dst, uint8_t *src,
 133                 int stride, int h, int ox, int oy,
 134                 int dxx, int dxy, int dyx, int dyy,
 135                 int shift, int r, int width, int height);
 136
 137 void ff_vector_clipf_sse(float *dst, const float *src,
 138                          float min, float max, int len);
 139
 140 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
 141                         ptrdiff_t line_size, int h);
 142 void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
 143                          ptrdiff_t line_size, int h);
 144 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
 145                         ptrdiff_t line_size, int h);
 146 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
 147                          ptrdiff_t line_size, int h);
 148 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
 149                            ptrdiff_t line_size, int h);
 150 void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
 151                            ptrdiff_t line_size, int h);
 152 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
 153                           ptrdiff_t line_size, int h);
 154 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
 155                           ptrdiff_t line_size, int h);
 156
 157 void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
 158                            ptrdiff_t line_size, int h);
 159
 160 void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 161                             ptrdiff_t line_size, int h);
 162 void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 163                              ptrdiff_t line_size, int h);
 164
 165 void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 166                             ptrdiff_t line_size, int h);
 167 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
 168                              ptrdiff_t line_size, int h);
 169
 170
 171 void ff_mmx_idct(int16_t *block);
 172 void ff_mmxext_idct(int16_t *block);
 173
 174 void ff_deinterlace_line_mmx(uint8_t *dst,
 175                              const uint8_t *lum_m4, const uint8_t *lum_m3,
 176                              const uint8_t *lum_m2, const uint8_t *lum_m1,
 177                              const uint8_t *lum,
 178                              int size);
 179
 180 void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
 181                                      const uint8_t *lum_m3,
 182                                      const uint8_t *lum_m2,
 183                                      const uint8_t *lum_m1,
 184                                      const uint8_t *lum, int size);
 185
 186 #define PIXELS16(STATIC, PFX1, PFX2, TYPE, CPUEXT)                      \
 187 STATIC void PFX1 ## _pixels16 ## TYPE ## CPUEXT(uint8_t *block,         \
 188                                                 const uint8_t *pixels,  \
 189                                                 ptrdiff_t line_size,    \
 190                                                 int h)                  \
 191 {                                                                       \
 192     PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block,      pixels,      \
 193                                                line_size, h);           \
 194     PFX2 ## PFX1 ## _pixels8 ## TYPE ## CPUEXT(block + 8,  pixels + 8,  \
 195                                                line_size, h);           \
 196 }
 197
 198 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */