git.sesse.net Git - ffmpeg/blob - libavcodec/arm/ac3dsp_neon.S

   1 /*
   2  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
   3  *
   4  * This file is part of Libav.
   5  *
   6  * Libav is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * Libav is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with Libav; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/arm/asm.S"
  22
  23 function ff_ac3_max_msb_abs_int16_neon, export=1
  24         vmov.i16        q0,  #0
  25         vmov.i16        q2,  #0
  26 1:      vld1.16         {q1},     [r0,:128]!
  27         vabs.s16        q1,  q1
  28         vld1.16         {q3},     [r0,:128]!
  29         vabs.s16        q3,  q3
  30         vorr            q0,  q0,  q1
  31         vorr            q2,  q2,  q3
  32         subs            r1,  r1,  #16
  33         bgt             1b
  34         vorr            q0,  q0,  q2
  35         vorr            d0,  d0,  d1
  36         vpmax.u16       d0,  d0,  d0
  37         vpmax.u16       d0,  d0,  d0
  38         vmov.u16        r0,  d0[0]
  39         bx              lr
  40 endfunc
  41
  42 function ff_ac3_exponent_min_neon, export=1
  43         cmp             r1,  #0
  44         it              eq
  45         bxeq            lr
  46         push            {lr}
  47         mov             r12, #256
  48 1:
  49         vld1.8          {q0},     [r0,:128]
  50         mov             lr,  r1
  51         add             r3,  r0,  #256
  52 2:      vld1.8          {q1},     [r3,:128], r12
  53         subs            lr,  lr,  #1
  54         vmin.u8         q0,  q0,  q1
  55         bgt             2b
  56         subs            r2,  r2,  #16
  57         vst1.8          {q0},     [r0,:128]!
  58         bgt             1b
  59         pop             {pc}
  60 endfunc
  61
  62 function ff_ac3_lshift_int16_neon, export=1
  63         vdup.16         q0,  r2
  64 1:      vld1.16         {q1},     [r0,:128]
  65         vshl.s16        q1,  q1,  q0
  66         vst1.16         {q1},     [r0,:128]!
  67         subs            r1,  r1,  #8
  68         bgt             1b
  69         bx              lr
  70 endfunc
  71
  72 function ff_ac3_rshift_int32_neon, export=1
  73         rsb             r2,  r2,  #0
  74         vdup.32         q0,  r2
  75 1:      vld1.32         {q1},     [r0,:128]
  76         vshl.s32        q1,  q1,  q0
  77         vst1.32         {q1},     [r0,:128]!
  78         subs            r1,  r1,  #4
  79         bgt             1b
  80         bx              lr
  81 endfunc
  82
  83 function ff_float_to_fixed24_neon, export=1
  84 1:      vld1.32         {q0-q1},  [r1,:128]!
  85         vcvt.s32.f32    q0,  q0,  #24
  86         vld1.32         {q2-q3},  [r1,:128]!
  87         vcvt.s32.f32    q1,  q1,  #24
  88         vcvt.s32.f32    q2,  q2,  #24
  89         vst1.32         {q0-q1},  [r0,:128]!
  90         vcvt.s32.f32    q3,  q3,  #24
  91         vst1.32         {q2-q3},  [r0,:128]!
  92         subs            r2,  r2,  #16
  93         bgt             1b
  94         bx              lr
  95 endfunc
  96
  97 function ff_ac3_extract_exponents_neon, export=1
  98         vmov.i32        q15, #8
  99 1:
 100         vld1.32         {q0},     [r1,:128]!
 101         vabs.s32        q1,  q0
 102         vclz.i32        q3,  q1
 103         vsub.i32        q3,  q3,  q15
 104         vmovn.i32       d6,  q3
 105         vmovn.i16       d6,  q3
 106         vst1.32         {d6[0]},  [r0,:32]!
 107         subs            r2,  r2,  #4
 108         bgt             1b
 109         bx              lr
 110 endfunc
 111
 112 function ff_apply_window_int16_neon, export=1
 113         push            {r4,lr}
 114         add             r4,  r1,  r3,  lsl #1
 115         add             lr,  r0,  r3,  lsl #1
 116         sub             r4,  r4,  #16
 117         sub             lr,  lr,  #16
 118         mov             r12, #-16
 119 1:
 120         vld1.16         {q0},     [r1,:128]!
 121         vld1.16         {q2},     [r2,:128]!
 122         vld1.16         {q1},     [r4,:128], r12
 123         vrev64.16       q3,  q2
 124         vqrdmulh.s16    q0,  q0,  q2
 125         vqrdmulh.s16    d2,  d2,  d7
 126         vqrdmulh.s16    d3,  d3,  d6
 127         vst1.16         {q0},     [r0,:128]!
 128         vst1.16         {q1},     [lr,:128], r12
 129         subs            r3,  r3,  #16
 130         bgt             1b
 131
 132         pop             {r4,pc}
 133 endfunc