git.sesse.net Git - ffmpeg/blob - libavcodec/x86/fmtconvert.asm

   1 ;******************************************************************************
   2 ;* x86 optimized Format Conversion Utils
   3 ;* Copyright (c) 2008 Loren Merritt
   4 ;*
   5 ;* This file is part of FFmpeg.
   6 ;*
   7 ;* FFmpeg is free software; you can redistribute it and/or
   8 ;* modify it under the terms of the GNU Lesser General Public
   9 ;* License as published by the Free Software Foundation; either
  10 ;* version 2.1 of the License, or (at your option) any later version.
  11 ;*
  12 ;* FFmpeg is distributed in the hope that it will be useful,
  13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;* Lesser General Public License for more details.
  16 ;*
  17 ;* You should have received a copy of the GNU Lesser General Public
  18 ;* License along with FFmpeg; if not, write to the Free Software
  19 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20 ;******************************************************************************
  21
  22 %include "libavutil/x86/x86util.asm"
  23
  24 SECTION .text
  25
  26 ;------------------------------------------------------------------------------
  27 ; void ff_int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul,
  28 ;                                    int len);
  29 ;------------------------------------------------------------------------------
  30 %macro INT32_TO_FLOAT_FMUL_SCALAR 1
  31 %if UNIX64
  32 cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len
  33 %else
  34 cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
  35 %endif
  36 %if WIN64
  37     SWAP 0, 2
  38 %elif ARCH_X86_32
  39     movss   m0, mulm
  40 %endif
  41     SPLATD  m0
  42     shl     lenq, 2
  43     add     srcq, lenq
  44     add     dstq, lenq
  45     neg     lenq
  46 .loop:
  47 %if cpuflag(sse2)
  48     cvtdq2ps  m1, [srcq+lenq   ]
  49     cvtdq2ps  m2, [srcq+lenq+16]
  50 %else
  51     cvtpi2ps  m1, [srcq+lenq   ]
  52     cvtpi2ps  m3, [srcq+lenq+ 8]
  53     cvtpi2ps  m2, [srcq+lenq+16]
  54     cvtpi2ps  m4, [srcq+lenq+24]
  55     movlhps   m1, m3
  56     movlhps   m2, m4
  57 %endif
  58     mulps     m1, m0
  59     mulps     m2, m0
  60     mova  [dstq+lenq   ], m1
  61     mova  [dstq+lenq+16], m2
  62     add     lenq, 32
  63     jl .loop
  64     REP_RET
  65 %endmacro
  66
  67 INIT_XMM sse
  68 INT32_TO_FLOAT_FMUL_SCALAR 5
  69 INIT_XMM sse2
  70 INT32_TO_FLOAT_FMUL_SCALAR 3
  71
  72 ;------------------------------------------------------------------------------
  73 ; void ff_int32_to_float_fmul_array8(FmtConvertContext *c, float *dst, const int32_t *src,
  74 ;                                    const float *mul, int len);
  75 ;------------------------------------------------------------------------------
  76 %macro INT32_TO_FLOAT_FMUL_ARRAY8 0
  77 cglobal int32_to_float_fmul_array8, 5, 5, 5, c, dst, src, mul, len
  78     shl     lend, 2
  79     add     srcq, lenq
  80     add     dstq, lenq
  81     neg     lenq
  82 .loop:
  83     movss     m0, [mulq]
  84     SPLATD    m0
  85 %if cpuflag(sse2)
  86     cvtdq2ps  m1, [srcq+lenq   ]
  87     cvtdq2ps  m2, [srcq+lenq+16]
  88 %else
  89     cvtpi2ps  m1, [srcq+lenq   ]
  90     cvtpi2ps  m3, [srcq+lenq+ 8]
  91     cvtpi2ps  m2, [srcq+lenq+16]
  92     cvtpi2ps  m4, [srcq+lenq+24]
  93     movlhps   m1, m3
  94     movlhps   m2, m4
  95 %endif
  96     mulps     m1, m0
  97     mulps     m2, m0
  98     mova  [dstq+lenq   ], m1
  99     mova  [dstq+lenq+16], m2
 100     add     mulq, 4
 101     add     lenq, 32
 102     jl .loop
 103     REP_RET
 104 %endmacro
 105
 106 INIT_XMM sse
 107 INT32_TO_FLOAT_FMUL_ARRAY8
 108 INIT_XMM sse2
 109 INT32_TO_FLOAT_FMUL_ARRAY8
 110