]> git.sesse.net Git - vlc/blob - modules/video_filter/deinterlace/merge_arm.S
Fix building with Clang/LLVM
[vlc] / modules / video_filter / deinterlace / merge_arm.S
1  @*****************************************************************************
2  @ merge_arm.S : ARM NEON mean
3  @*****************************************************************************
4  @ Copyright (C) 2009-2012 RĂ©mi Denis-Courmont
5  @
6  @ This program is free software; you can redistribute it and/or modify
7  @ it under the terms of the GNU Lesser General Public License as published by
8  @ the Free Software Foundation; either version 2.1 of the License, or
9  @ (at your option) any later version.
10  @
11  @ This program is distributed in the hope that it will be useful,
12  @ but WITHOUT ANY WARRANTY; without even the implied warranty of
13  @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  @ GNU Lesser General Public License for more details.
15  @
16  @ You should have received a copy of the GNU Lesser General Public License
17  @ along with this program; if not, write to the Free Software Foundation,
18  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19  @****************************************************************************/
20
21         .syntax unified
22         .arm
23         .arch   armv6
24         .fpu    neon
25         .text
26
27 #define DEST    r0
28 #define SRC1    r1
29 #define SRC2    r2
30 #define SIZE    r3
31
32         .align 2
33         .global merge8_arm_neon
34         .type   merge8_arm_neon, %function
35         @ NOTE: Offset and pitch must be multiple of 16-bytes in VLC.
36 merge8_arm_neon:
37         cmp             SIZE,   #64
38         blo             2f
39 1:
40         pld             [SRC1, #64]
41         vld1.u8         {q0-q1},        [SRC1,:128]!
42         pld             [SRC2, #64]
43         vld1.u8         {q8-q9},        [SRC2,:128]!
44         vhadd.u8        q0,     q0,     q8
45         sub             SIZE,   SIZE,   #64
46         vld1.u8         {q2-q3},        [SRC1,:128]!
47         vhadd.u8        q1,     q1,     q9
48         vld1.u8         {q10-q11},      [SRC2,:128]!
49         vhadd.u8        q2,     q2,     q10
50         cmp             SIZE,   #64
51         vhadd.u8        q3,     q3,     q11
52         vst1.u8         {q0-q1},        [DEST,:128]!
53         vst1.u8         {q2-q3},        [DEST,:128]!
54         bhs             1b
55 2:
56         cmp             SIZE,   #32
57         blo             3f
58         vld1.u8         {q0-q1},        [SRC1,:128]!
59         sub             SIZE,   SIZE,   #32
60         vld1.u8         {q8-q9},        [SRC2,:128]!
61         vhadd.u8        q0,     q0,     q8
62         vhadd.u8        q1,     q1,     q9
63         vst1.u8         {q0-q1},        [DEST,:128]!
64 3:
65         cmp             SIZE,   #16
66         bxlo            lr
67         vld1.u8         {q0},           [SRC1,:128]!
68         sub             SIZE,   SIZE,   #16
69         vld1.u8         {q8},           [SRC2,:128]!
70         vhadd.u8        q0,     q0,     q8
71         vst1.u8         {q0},           [DEST,:128]!
72         bx              lr
73
74         .align 2
75         .global merge16_arm_neon
76         .type   merge16_arm_neon, %function
77 merge16_arm_neon:
78         cmp             SIZE,   #64
79         blo             2f
80 1:
81         pld             [SRC1, #64]
82         vld1.u16        {q0-q1},        [SRC1,:128]!
83         pld             [SRC2, #64]
84         vld1.u16        {q8-q9},        [SRC2,:128]!
85         vhadd.u16       q0,     q0,     q8
86         sub             SIZE,   SIZE,   #64
87         vld1.u16        {q2-q3},        [SRC1,:128]!
88         vhadd.u16       q1,     q1,     q9
89         vld1.u16        {q10-q11},      [SRC2,:128]!
90         vhadd.u16       q2,     q2,     q10
91         cmp             SIZE,   #64
92         vhadd.u16       q3,     q3,     q11
93         vst1.u16        {q0-q1},        [DEST,:128]!
94         vst1.u16        {q2-q3},        [DEST,:128]!
95         bhs             1b
96 2:
97         cmp             SIZE,   #32
98         blo             3f
99         vld1.u16        {q0-q1},        [SRC1,:128]!
100         sub             SIZE,   SIZE,   #32
101         vld1.u16        {q8-q9},        [SRC2,:128]!
102         vhadd.u16       q0,     q0,     q8
103         vhadd.u16       q1,     q1,     q9
104         vst1.u16        {q0-q1},        [DEST,:128]!
105 3:
106         cmp             SIZE,   #16
107         bxlo            lr
108         vld1.u16        {q0},           [SRC1,:128]!
109         sub             SIZE,   SIZE,   #16
110         vld1.u16        {q8},           [SRC2,:128]!
111         vhadd.u16       q0,     q0,     q8
112         vst1.u16        {q0},           [DEST,:128]!
113         bx              lr
114
115         .align 2
116         .global merge8_armv6
117         .type   merge8_armv6, %function
118 merge8_armv6:
119         push            {r4-r9,lr}
120 1:
121         pld             [SRC1, #64]
122         ldm             SRC1!,  {r4-r5}
123         pld             [SRC2, #64]
124         ldm             SRC2!,  {r8-r9}
125         subs            SIZE,   SIZE,   #16
126         uhadd8          r4,     r4,     r8
127         ldm             SRC1!,  {r6-r7}
128         uhadd8          r5,     r5,     r9
129         ldm             SRC2!,  {ip,lr}
130         uhadd8          r6,     r6,     ip
131         stm             DEST!,  {r4-r5}
132         uhadd8          r7,     r7,     lr
133         stm             DEST!,  {r6-r7}
134         popeq           {r4-r9,pc}
135         b               1b
136
137         .align 2
138         .global merge16_armv6
139         .type   merge16_armv6, %function
140 merge16_armv6:
141         push            {r4-r9,lr}
142 1:
143         pld             [SRC1, #64]
144         ldm             SRC1!,  {r4-r5}
145         pld             [SRC2, #64]
146         ldm             SRC2!,  {r8-r9}
147         subs            SIZE,   SIZE,   #16
148         uhadd16         r4,     r4,     r8
149         ldm             SRC1!,  {r6-r7}
150         uhadd16         r5,     r5,     r9
151         ldm             SRC2!,  {ip,lr}
152         uhadd16         r6,     r6,     ip
153         stm             DEST!,  {r4-r5}
154         uhadd16         r7,     r7,     lr
155         stm             DEST!,  {r6-r7}
156         popeq           {r4-r9,pc}
157         b               1b