]> git.sesse.net Git - vlc/blob - modules/video_chroma/i420_yuyv_neon.S
i420->YUYV NEON: rewrite using VZIP
[vlc] / modules / video_chroma / i420_yuyv_neon.S
1  @*****************************************************************************
2  @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion
3  @*****************************************************************************
4  @ Copyright (C) 2009 RĂ©mi Denis-Courmont
5  @
6  @ This program is free software; you can redistribute it and/or modify
7  @ it under the terms of the GNU General Public License as published by
8  @ the Free Software Foundation; either version 2 of the License, or
9  @ (at your option) any later version.
10  @
11  @ This program is distributed in the hope that it will be useful,
12  @ but WITHOUT ANY WARRANTY; without even the implied warranty of
13  @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  @ GNU General Public License for more details.
15  @
16  @ You should have received a copy of the GNU General Public License
17  @ along with this program; if not, write to the Free Software Foundation,
18  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19  @****************************************************************************/
20
21         .fpu neon
22         .text
23
24 #define O1      r0
25 #define O2      r1
26 #define PITCH   r2
27 #define HEIGHT  r3
28 #define Y1      r4
29 #define Y2      r5
30 #define U       r6
31 #define V       r7
32 #define END_O1  r12
33
34         .align
35         .global i420_yuyv_neon
36         .type   i420_yuyv_neon, %function
37 i420_yuyv_neon:
38         push            {r4-r7, lr}
39         ldmia           r1,     {Y1, U, V}
40         add             O2,     O1,     PITCH, lsl #1
41         add             Y2,     Y1,     PITCH
42 1:
43         mov             END_O1, O2
44 2:
45         vld1.u8         {d2},           [U,:64]!
46         vld1.u8         {d3},           [V,:64]!
47         vzip.u8         d2,     d3
48         vld1.u8         {q0},           [Y1,:128]!
49         vmov            q3,     q1
50         vzip.u8         q0,     q1
51         vld1.u8         {q2},           [Y2,:128]!
52         vzip.u8         q2,     q3
53         vst1.u8         {q0-q1},        [O1,:128]!
54         vst1.u8         {q2-q3},        [O2,:128]!
55
56         cmp             O1,     END_O1
57         bne             2b
58
59         sub             HEIGHT, #2
60         mov             O1,     O2
61         add             O2,     PITCH,  lsl #1
62         mov             Y1,     Y2
63         add             Y2,     PITCH
64
65         cmp             HEIGHT, #0
66         bne             1b
67
68         pop             {r4-r7, pc}
69
70         .global i420_uyvy_neon
71         .type   i420_uyvy_neon, %function
72 i420_uyvy_neon:
73         push            {r4-r7, lr}
74         ldmia           r1,     {Y1, U, V}
75         add             O2,     O1,     PITCH, lsl #1
76         add             Y2,     Y1,     PITCH
77 1:
78         mov             END_O1, O2
79 2:
80         vld1.u8         {d0},           [U,:64]!
81         vld1.u8         {d1},           [V,:64]!
82         vzip.u8         d0,     d1
83         vld1.u8         {q1},           [Y1,:128]!
84         vmov            q2,     q0
85         vzip.u8         q0,     q1
86         vld1.u8         {q3},           [Y2,:128]!
87         vzip.u8         q2,     q3
88         vst1.u8         {q0-q1},        [O1,:128]!
89         vst1.u8         {q2-q3},        [O2,:128]!
90
91         cmp             O1,     END_O1
92         bne             2b
93
94         sub             HEIGHT, #2
95         mov             O1,     O2
96         add             O2,     PITCH,  lsl #1
97         mov             Y1,     Y2
98         add             Y2,     PITCH
99
100         cmp             HEIGHT, #0
101         bne             1b
102
103         pop             {r4-r7, pc}