git.sesse.net Git - vlc/blob - modules/arm_neon/s32_s16.S

   1  @*****************************************************************************
   2  @ neon_s32_s16.S : ARM NEONv1 fi32 to s16n audio sample conversion
   3  @*****************************************************************************
   4  @ Copyright (C) 2009 Rémi Denis-Courmont
   5  @
   6  @ This program is free software; you can redistribute it and/or modify
   7  @ it under the terms of the GNU General Public License as published by
   8  @ the Free Software Foundation; either version 2 of the License, or
   9  @ (at your option) any later version.
  10  @
  11  @ This program is distributed in the hope that it will be useful,
  12  @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  @ GNU General Public License for more details.
  15  @
  16  @ You should have received a copy of the GNU General Public License
  17  @ along with this program; if not, write to the Free Software Foundation,
  18  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  19  @****************************************************************************/
  20
  21         .fpu neon
  22         .text
  23
  24 #define OUT     r0
  25 #define IN      r1
  26 #define N       r2
  27 #define BUF     r3
  28 #define HALF    ip
  29
  30         .align
  31         .global s32_s16_neon
  32         .type   s32_s16_neon, %function
  33         @ Converts fixed-point 32-bits to signed 16-bits
  34         @ Input and output must be on 128-bits boundary
  35 s32_s16_neon:
  36         pld             [IN]
  37 2:
  38         cmp             N,      #8
  39         blt             s32_s16_neon_unaligned
  40         vld1.s32        {q8-q9},        [IN,:128]!
  41
  42 3:      @ Main loop
  43         pld             [IN, #64]
  44         sub             N,      #8
  45         vqrshrn.s32     d16,    q8,     #13
  46         vqrshrn.s32     d17,    q9,     #13
  47         cmp             N,      #8
  48         blt             4f
  49         vld1.s32        {q10-q11},      [IN,:128]!
  50         sub             N,      #8
  51         vqrshrn.s32     d18,    q10,    #13
  52         vqrshrn.s32     d19,    q11,    #13
  53         cmp             N,      #8
  54         blt             5f
  55         vld1.s32        {q12-q13},      [IN,:128]!
  56         sub             N,      #8
  57         vqrshrn.s32     d20,    q12,    #13
  58         vqrshrn.s32     d21,    q13,    #13
  59         vst1.s16        {d16-d19},      [OUT,:128]!
  60         cmp             N,      #8
  61         blt             6f
  62         vld1.s32        {q8-q9},        [IN,:128]!
  63         vst1.s16        {d20-d21},      [OUT,:128]!
  64         b               3b
  65 4:
  66         vst1.s16        {d16-d17},      [OUT,:128]!
  67         b               7f
  68 5:
  69         vst1.s16        {d16-d19},      [OUT,:128]!
  70         b               7f
  71 6:
  72         vst1.s16        {d20-d21},      [OUT,:128]!
  73 7:
  74         cmp             N,      #4
  75         blt             s32_s16_neon_unaligned
  76         vld1.s32        {q8},           [IN,:128]!
  77         sub             N,      #4
  78         vqrshrn.s32     d16,    q8,     #13
  79         vst1.s16        {d16},          [OUT,:64]!
  80
  81         @ Fall through for last 0-3 samples
  82
  83         .global s32_s16_neon_unaligned
  84         .type   s32_s16_neon_unaligned, %function
  85         @ Converts fixed-point 32-bits to signed 16-bits
  86         @ Input must be on 32-bits boundary, output on 16-bits
  87 s32_s16_neon_unaligned:
  88         mov             HALF,   #4096
  89 1:
  90         cmp             N,      #0
  91         bxeq            lr
  92
  93         ldr             BUF,    [IN]
  94         add             IN,     #4
  95         add             OUT,    #2
  96         qadd            BUF,    HALF,   BUF
  97         sub             N,      #1
  98         ssat            BUF,    #16,    BUF, asr #13
  99         strh            BUF,    [OUT, #-2]
 100         b               1b