git.sesse.net Git - vlc/blob - modules/audio_filter/converter/neon.c

   1 /*****************************************************************************
   2  * arm_neon.c: NEON assembly optimized audio conversions
   3  *****************************************************************************
   4  * Copyright (C) 2009 Rémi Denis-Courmont
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  19  *****************************************************************************/
  20
  21 #ifdef HAVE_CONFIG_H
  22 # include "config.h"
  23 #endif
  24
  25 #include <vlc_common.h>
  26 #include <vlc_plugin.h>
  27 #include <vlc_aout.h>
  28 #include <vlc_filter.h>
  29 #include <vlc_cpu.h>
  30
  31 static int Open (vlc_object_t *);
  32
  33 vlc_module_begin ()
  34     set_description (N_("ARM NEON audio format conversions") )
  35     set_capability ("audio filter2", 20)
  36     set_callbacks (Open, NULL)
  37     add_requirement (NEON)
  38 vlc_module_end ()
  39
  40 static block_t *Do_F32_S32 (filter_t *, block_t *);
  41 static block_t *Do_S32_S16 (filter_t *, block_t *);
  42
  43 static int Open (vlc_object_t *obj)
  44 {
  45     filter_t *filter = (filter_t *)obj;
  46
  47     if (!AOUT_FMTS_SIMILAR (&filter->fmt_in.audio, &filter->fmt_out.audio))
  48         return VLC_EGENERIC;
  49
  50     switch (filter->fmt_in.audio.i_format)
  51     {
  52         case VLC_CODEC_FL32:
  53             switch (filter->fmt_out.audio.i_format)
  54             {
  55                 case VLC_CODEC_FI32:
  56                     filter->pf_audio_filter = Do_F32_S32;
  57                     break;
  58                 default:
  59                     return VLC_EGENERIC;
  60             }
  61             break;
  62
  63         case VLC_CODEC_FI32:
  64             switch (filter->fmt_out.audio.i_format)
  65             {
  66                 case VLC_CODEC_S16N:
  67                     filter->pf_audio_filter = Do_S32_S16;
  68                     break;
  69                 default:
  70                     return VLC_EGENERIC;
  71             }
  72             break;
  73         default:
  74             return VLC_EGENERIC;
  75     }
  76     return VLC_SUCCESS;
  77 }
  78
  79 /**
  80  * Single-precision floating point to signed fixed point conversion.
  81  */
  82 static block_t *Do_F32_S32 (filter_t *filter, block_t *inbuf)
  83 {
  84     unsigned nb_samples = inbuf->i_nb_samples
  85                      * aout_FormatNbChannels (&filter->fmt_in.audio);
  86     int32_t *outp = (int32_t *)inbuf->p_buffer;
  87     int32_t *endp = outp + nb_samples;
  88
  89     if (nb_samples & 1)
  90     {
  91         asm volatile (
  92             "vldr.32 s0, [%[outp]]\n"
  93             "vcvt.s32.f32 d0, d0, #28\n"
  94             "vstr.32 s0, [%[outp]]\n"
  95             :
  96             : [outp] "r" (outp)
  97             : "d0", "memory");
  98         outp++;
  99     }
 100
 101     if (nb_samples & 2)
 102         asm volatile (
 103             "vld1.f32 {d0}, [%[outp]]\n"
 104             "vcvt.s32.f32 d0, d0, #28\n"
 105             "vst1.s32 {d0}, [%[outp]]!\n"
 106             : [outp] "+r" (outp)
 107             :
 108             : "d0", "memory");
 109
 110     if (nb_samples & 4)
 111         asm volatile (
 112             "vld1.f32 {q0}, [%[outp]]\n"
 113             "vcvt.s32.f32 q0, q0, #28\n"
 114             "vst1.s32 {q0}, [%[outp]]!\n"
 115             : [outp] "+r" (outp)
 116             :
 117             : "q0", "memory");
 118
 119     while (outp != endp)
 120         asm volatile (
 121             "vld1.f32 {q0-q1}, [%[outp]]\n"
 122             "vcvt.s32.f32 q0, q0, #28\n"
 123             "vcvt.s32.f32 q1, q1, #28\n"
 124             "vst1.s32 {q0-q1}, [%[outp]]!\n"
 125             : [outp] "+r" (outp)
 126             :
 127             : "q0", "q1", "memory");
 128
 129     return inbuf;
 130 }
 131
 132 /**
 133  * Signed 32-bits fixed point to signed 16-bits integer
 134  */
 135 static block_t *Do_S32_S16 (filter_t *filter, block_t *inbuf)
 136 {
 137     unsigned nb_samples = inbuf->i_nb_samples
 138                      * aout_FormatNbChannels (&filter->fmt_in.audio);
 139     int32_t *inp = (int32_t *)inbuf->p_buffer;
 140     const int32_t *endp = inp + nb_samples;
 141     int16_t *outp = (int16_t *)inp;
 142
 143     while (nb_samples & 3)
 144     {
 145         const int16_t roundup = 1 << 12;
 146         asm volatile (
 147             "qadd r0, %[inv], %[roundup]\n"
 148             "ssat %[outv], #16, r0, asr #13\n"
 149             : [outv] "=r" (*outp)
 150             : [inv] "r" (*inp), [roundup] "r" (roundup)
 151             : "r0");
 152         inp++;
 153         outp++;
 154         nb_samples--;
 155     }
 156
 157     if (nb_samples & 4)
 158         asm volatile (
 159             "vld1.s32 {q0}, [%[inp]]!\n"
 160             "vrshrn.i32 d0, q0, #13\n"
 161             "vst1.s16 {d0}, [%[outp]]!\n"
 162             : [outp] "+r" (outp), [inp] "+r" (inp)
 163             :
 164             : "q0", "memory");
 165
 166     if (nb_samples & 8)
 167         asm volatile (
 168             "vld1.s32 {q0-q1}, [%[inp]]!\n"
 169             "vrshrn.i32 d0, q0, #13\n"
 170             "vrshrn.i32 d1, q1, #13\n"
 171             "vst1.s16 {q0}, [%[outp]]!\n"
 172             : [outp] "+r" (outp), [inp] "+r" (inp)
 173             :
 174             : "q0", "q1", "memory");
 175
 176     while (inp != endp)
 177         asm volatile (
 178             "vld1.s32 {q0-q1}, [%[inp]]!\n"
 179             "vld1.s32 {q2-q3}, [%[inp]]!\n"
 180             "vrshrn.s32 d0, q0, #13\n"
 181             "vrshrn.s32 d1, q1, #13\n"
 182             "vrshrn.s32 d2, q2, #13\n"
 183             "vrshrn.s32 d3, q3, #13\n"
 184             "vst1.s16 {q0-q1}, [%[outp]]!\n"
 185             : [outp] "+r" (outp), [inp] "+r" (inp)
 186             :
 187             : "q0", "q1", "q2", "q3", "memory");
 188
 189     inbuf->i_buffer /= 2;
 190     return inbuf;
 191 }