git.sesse.net Git - ffmpeg/blob - libswscale/arm/rgb2yuv_neon_common.S

   1 /*
   2  * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/arm/asm.S"
  22
  23 .macro alias name, tgt, set=1
  24 .if \set != 0
  25     \name   .req    \tgt
  26 .else
  27     .unreq  \name
  28 .endif
  29 .endm
  30
  31 .altmacro
  32
  33 .macro alias_dw_all qw, dw_l, dw_h
  34     alias   q\qw\()_l, d\dw_l
  35     alias   q\qw\()_h, d\dw_h
  36     .if \qw < 15
  37         alias_dw_all  %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
  38     .endif
  39 .endm
  40
  41 alias_dw_all    0, 0, 1
  42
  43 .noaltmacro
  44
  45 .macro alias_qw     name, qw, set=1
  46     alias   \name\(), \qw, \set
  47     alias   \name\()_l, \qw\()_l, \set
  48     alias   \name\()_h, \qw\()_h, \set
  49 .endm
  50
  51 .macro prologue
  52     push            {r4-r12, lr}
  53     vpush           {q4-q7}
  54 .endm
  55
  56 .macro epilogue
  57     vpop            {q4-q7}
  58     pop             {r4-r12, pc}
  59 .endm
  60
  61 .macro  load_arg    reg, ix
  62     ldr     \reg,   [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
  63 .endm
  64
  65
  66 /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
  67  *                  int width, int height,
  68  *                  int y_stride, int c_stride, int src_stride,
  69  *                  int32_t coeff_table[9]);
  70  */
  71 .macro  alias_loop_420sp set=1
  72     alias   src,        r0, \set
  73     alias   src0,       src, \set
  74     alias   y,          r1, \set
  75     alias   y0,         y, \set
  76     alias   chroma,     r2, \set
  77     alias   width,      r3, \set
  78     alias   header,     width, \set
  79
  80     alias   height,     r4, \set
  81     alias   y_stride,   r5, \set
  82     alias   c_stride,   r6, \set
  83     alias   c_padding,  c_stride, \set
  84     alias   src_stride, r7, \set
  85
  86     alias   y0_end,     r8, \set
  87
  88     alias   src_padding,r9, \set
  89     alias   y_padding,  r10, \set
  90
  91     alias   src1,       r11, \set
  92     alias   y1,         r12, \set
  93
  94     alias   coeff_table,r12, \set
  95 .endm
  96
  97
  98 .macro  loop_420sp s_fmt, d_fmt, init, kernel, precision
  99
 100 function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
 101     prologue
 102
 103     alias_loop_420sp
 104
 105     load_arg    height,         4
 106     load_arg    y_stride,       5
 107     load_arg    c_stride,       6
 108     load_arg    src_stride,     7
 109     load_arg    coeff_table,    8
 110
 111     \init       coeff_table
 112
 113     sub         y_padding,      y_stride,       width
 114     sub         c_padding,      c_stride,       width
 115     sub         src_padding,    src_stride,     width, LSL #2
 116
 117     add         y0_end,         y0,             width
 118     and         header,         width,          #15
 119
 120     add         y1,             y0,             y_stride
 121     add         src1,           src0,           src_stride
 122
 123 0:
 124     cmp         header,     #0
 125     beq         1f
 126
 127     \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
 128
 129 1:
 130     \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
 131
 132     cmp         y0,         y0_end
 133     blt         1b
 134 2:
 135     add         y0,         y1,         y_padding
 136     add         y0_end,     y1,         y_stride
 137     add         chroma,     chroma,     c_padding
 138     add         src0,       src1,       src_padding
 139
 140     add         y1,         y0,         y_stride
 141     add         src1,       src0,       src_stride
 142
 143     subs        height,     height,     #2
 144
 145     bgt         0b
 146
 147     epilogue
 148
 149     alias_loop_420sp 0
 150
 151 endfunc
 152 .endm
 153
 154 .macro downsample
 155     vpaddl.u8   r16x8,  r8x16
 156     vpaddl.u8   g16x8,  g8x16
 157     vpaddl.u8   b16x8,  b8x16
 158 .endm
 159
 160
 161 /* acculumate and right shift by 2 */
 162 .macro downsample_ars2
 163     vpadal.u8   r16x8,  r8x16
 164     vpadal.u8   g16x8,  g8x16
 165     vpadal.u8   b16x8,  b8x16
 166
 167     vrshr.u16   r16x8,  r16x8,  #2
 168     vrshr.u16   g16x8,  g16x8,  #2
 169     vrshr.u16   b16x8,  b16x8,  #2
 170 .endm
 171
 172 .macro store_y8_16x1            dst, count
 173 .ifc "\count",""
 174     vstmia      \dst!,  {y8x16}
 175 .else
 176     vstmia      \dst,   {y8x16}
 177     add         \dst,   \dst,           \count
 178 .endif
 179 .endm
 180
 181 .macro store_chroma_nv12_8x1    dst, count
 182 .ifc "\count",""
 183     vst2.i8     {u8x8, v8x8},   [\dst]!
 184 .else
 185     vst2.i8     {u8x8, v8x8},   [\dst], \count
 186 .endif
 187 .endm
 188
 189 .macro store_chroma_nv21_8x1    dst, count
 190 .ifc "\count",""
 191     vst2.i8     {v8x8, u8x8},   [\dst]!
 192 .else
 193     vst2.i8     {v8x8, u8x8},   [\dst], \count
 194 .endif
 195 .endm
 196
 197 .macro load_8888_16x1   a, b, c, d, src, count
 198 .ifc "\count",""
 199     vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
 200     vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]!
 201 .else
 202     vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
 203     vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]
 204     sub         \src,   \src,   #32
 205     add         \src,   \src,   \count, LSL #2
 206 .endif
 207 .endm
 208
 209 .macro load_rgbx_16x1   src, count
 210     load_8888_16x1  r, g, b, x, \src, \count
 211 .endm
 212
 213 .macro load_bgrx_16x1   src, count
 214     load_8888_16x1  b, g, r, x, \src, \count
 215 .endm
 216
 217 .macro alias_src_rgbx   set=1
 218     alias_src_8888  r, g, b, x, \set
 219 .endm
 220
 221 .macro alias_src_bgrx   set=1
 222     alias_src_8888  b, g, r, x, \set
 223 .endm
 224
 225 .macro alias_dst_nv12   set=1
 226     alias   u8x8, c8x8x2_l, \set
 227     alias   v8x8, c8x8x2_h, \set
 228 .endm
 229
 230 .macro alias_dst_nv21   set=1
 231     alias   v8x8, c8x8x2_l, \set
 232     alias   u8x8, c8x8x2_h, \set
 233 .endm
 234
 235
 236 // common aliases
 237
 238 alias   CO_R    d0
 239 CO_RY   .dn     d0.s16[0]
 240 CO_RU   .dn     d0.s16[1]
 241 CO_RV   .dn     d0.s16[2]
 242
 243 alias   CO_G    d1
 244 CO_GY   .dn     d1.s16[0]
 245 CO_GU   .dn     d1.s16[1]
 246 CO_GV   .dn     d1.s16[2]
 247
 248 alias   CO_B    d2
 249 CO_BY   .dn     d2.s16[0]
 250 CO_BU   .dn     d2.s16[1]
 251 CO_BV   .dn     d2.s16[2]
 252
 253 alias   BIAS_U, d3
 254 alias   BIAS_V, BIAS_U
 255
 256 alias   BIAS_Y, q2
 257
 258
 259 /* q3-q6 R8G8B8X8 x16 */
 260
 261 .macro alias_src_8888   a, b, c, d, set
 262     alias_qw  \a\()8x16, q3, \set
 263     alias_qw  \b\()8x16, q4, \set
 264     alias_qw  \c\()8x16, q5, \set
 265     alias_qw  \d\()8x16, q6, \set
 266 .endm
 267
 268 .macro kernel_420_16x2  rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
 269     alias_src_\rgb_fmt
 270     alias_dst_\yuv_fmt
 271
 272     load_\rgb_fmt\()_16x1   \rgb0, \count
 273
 274     downsample
 275     compute_y_16x1
 276     store_y8_16x1   \y0, \count
 277
 278
 279     load_\rgb_fmt\()_16x1   \rgb1, \count
 280     downsample_ars2
 281     compute_y_16x1
 282     store_y8_16x1   \y1, \count
 283
 284     compute_chroma_8x1  u, U
 285     compute_chroma_8x1  v, V
 286
 287     store_chroma_\yuv_fmt\()_8x1 \chroma, \count
 288
 289     alias_dst_\yuv_fmt 0
 290     alias_src_\rgb_fmt 0
 291 .endm