]> git.sesse.net Git - vlc/blob - modules/arm_neon/i420_yuyv.S
Fix YV12 support in opengl output (when shader is in use).
[vlc] / modules / arm_neon / i420_yuyv.S
1  @*****************************************************************************
2  @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion
3  @*****************************************************************************
4  @ Copyright (C) 2009-2011 RĂ©mi Denis-Courmont
5  @
6  @ This program is free software; you can redistribute it and/or modify
7  @ it under the terms of the GNU General Public License as published by
8  @ the Free Software Foundation; either version 2 of the License, or
9  @ (at your option) any later version.
10  @
11  @ This program is distributed in the hope that it will be useful,
12  @ but WITHOUT ANY WARRANTY; without even the implied warranty of
13  @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  @ GNU General Public License for more details.
15  @
16  @ You should have received a copy of the GNU General Public License
17  @ along with this program; if not, write to the Free Software Foundation,
18  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19  @****************************************************************************/
20
21         .fpu neon
22         .text
23
24 #define O1      r0
25 #define O2      r1
26 #define WIDTH   r2
27 #define HEIGHT  r3
28 #define Y1      r4
29 #define Y2      r5
30 #define U       r6
31 #define V       r7
32 #define YPITCH  r8
33 #define OPAD    r10
34 #define YPAD    r11
35 #define COUNT   ip
36 #define OPITCH  lr
37
38         .align
39         .global i420_yuyv_neon
40         .type   i420_yuyv_neon, %function
41 i420_yuyv_neon:
42         push            {r4-r8,r10-r11,lr}
43         ldmia           r0,     {O1, OPITCH}
44         ldmia           r1,     {Y1, U, V, YPITCH}
45         cmp             HEIGHT, #0
46         sub             OPAD,   OPITCH, WIDTH,  lsl #1
47         sub             YPAD,   YPITCH, WIDTH
48 1:
49         movgts          COUNT,  WIDTH
50         add             O2,     O1,     OPITCH
51         add             Y2,     Y1,     YPITCH
52         pople           {r4-r8,r10-r11,pc}
53 2:
54         pld             [U, #64]
55         vld1.u8         {d2},           [U,:64]!
56         pld             [V, #64]
57         vld1.u8         {d3},           [V,:64]!
58         pld             [Y1, #64]
59         vzip.u8         d2,     d3
60         subs            COUNT,  COUNT,  #16
61         vld1.u8         {q0},           [Y1,:128]!
62         pld             [Y2, #64]
63         vmov            q3,     q1
64         vzip.u8         q0,     q1
65         vld1.u8         {q2},           [Y2,:128]!
66         vzip.u8         q2,     q3
67         vst1.u8         {q0-q1},        [O1,:128]!
68         vst1.u8         {q2-q3},        [O2,:128]!
69         bgt             2b
70
71         subs            HEIGHT, #2
72         add             O1,     O2,     OPAD
73         add             Y1,     Y2,     YPAD
74         add             U,      U,      YPAD,   lsr #1
75         add             V,      V,      YPAD,   lsr #1
76         b               1b
77
78         .global i420_uyvy_neon
79         .type   i420_uyvy_neon, %function
80 i420_uyvy_neon:
81         push            {r4-r8,r10-r11,lr}
82         ldmia           r0,     {O1, OPITCH}
83         ldmia           r1,     {Y1, U, V, YPITCH}
84         cmp             HEIGHT, #0
85         sub             OPAD,   OPITCH, WIDTH,  lsl #1
86         sub             YPAD,   YPITCH, WIDTH
87 1:
88         movgts          COUNT,  WIDTH
89         add             O2,     O1,     OPITCH
90         add             Y2,     Y1,     YPITCH
91         pople           {r4-r8,r10-r11,pc}
92 2:
93         pld             [U, #64]
94         vld1.u8         {d0},           [U,:64]!
95         pld             [V, #64]
96         vld1.u8         {d1},           [V,:64]!
97         pld             [Y1, #64]
98         vzip.u8         d0,     d1
99         subs            COUNT,  COUNT,  #16
100         vld1.u8         {q1},           [Y1,:128]!
101         pld             [Y2, #64]
102         vmov            q2,     q0
103         vzip.u8         q0,     q1
104         vld1.u8         {q3},           [Y2,:128]!
105         vzip.u8         q2,     q3
106         vst1.u8         {q0-q1},        [O1,:128]!
107         vst1.u8         {q2-q3},        [O2,:128]!
108         bgt             2b
109
110         subs            HEIGHT, #2
111         add             O1,     O2,     OPAD
112         add             Y1,     Y2,     YPAD
113         add             U,      U,      YPAD,   lsr #1
114         add             V,      V,      YPAD,   lsr #1
115         b               1b