]> git.sesse.net Git - ffmpeg/blob - libavcodec/aarch64/fmtconvert_neon.S
Merge commit 'f8df5e2f31a5ba7b30a0e1caaaf5a03c753b3f9b'
[ffmpeg] / libavcodec / aarch64 / fmtconvert_neon.S
1 /*
2  * ARM NEON optimised Format Conversion Utils
3  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4  * Copyright (c) 2015 Janne Grunau  <janne-libav@jannau.net>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 #include "config.h"
24 #include "libavutil/aarch64/asm.S"
25
26 function ff_int32_to_float_fmul_scalar_neon, export=1
27         ld1             {v1.4s,v2.4s}, [x1], #32
28         scvtf           v1.4s,  v1.4s
29         scvtf           v2.4s,  v2.4s
30 1:
31         subs            w2,  w2,  #8
32         fmul            v3.4s,  v1.4s,  v0.s[0]
33         fmul            v4.4s,  v2.4s,  v0.s[0]
34         b.le            2f
35         ld1             {v1.4s,v2.4s}, [x1], #32
36         st1             {v3.4s,v4.4s}, [x0], #32
37         scvtf           v1.4s,  v1.4s
38         scvtf           v2.4s,  v2.4s
39         b               1b
40 2:
41         st1             {v3.4s,v4.4s}, [x0]
42         ret
43 endfunc
44
45 function ff_int32_to_float_fmul_array8_neon, export=1
46         lsr             w4,  w4,  #3
47         subs            w5,  w4,  #1
48         b.eq            1f
49 2:
50         ld1             {v0.4s,v1.4s}, [x2], #32
51         ld1             {v2.4s,v3.4s}, [x2], #32
52         scvtf           v0.4s,  v0.4s
53         scvtf           v1.4s,  v1.4s
54         ld1             {v16.2s},  [x3], #8
55         scvtf           v2.4s,  v2.4s
56         scvtf           v3.4s,  v3.4s
57         fmul            v4.4s,  v0.4s,  v16.s[0]
58         fmul            v5.4s,  v1.4s,  v16.s[0]
59         fmul            v6.4s,  v2.4s,  v16.s[1]
60         fmul            v7.4s,  v3.4s,  v16.s[1]
61         st1             {v4.4s,v5.4s}, [x1], #32
62         st1             {v6.4s,v7.4s}, [x1], #32
63         subs            w5,  w5,  #2
64         b.gt            2b
65         b.eq            1f
66         ret
67 1:
68         ld1             {v0.4s,v1.4s}, [x2]
69         ld1             {v16.s}[0],  [x3]
70         scvtf           v0.4s,  v0.4s
71         scvtf           v1.4s,  v1.4s
72         fmul            v4.4s,  v0.4s,  v16.s[0]
73         fmul            v5.4s,  v1.4s,  v16.s[0]
74         st1             {v4.4s,v5.4s}, [x1]
75         ret
76 endfunc