]> git.sesse.net Git - vlc/blob - modules/arm_neon/yuyv_i422.S
Contribs: fix xml2 installation on OSX
[vlc] / modules / arm_neon / yuyv_i422.S
1  @*****************************************************************************
2  @ yuyv_i422_neon.S : ARM NEONv1 packed to planar YUV422 conversion
3  @*****************************************************************************
4  @ Copyright (C) 2011 RĂ©mi Denis-Courmont
5  @
6  @ This program is free software; you can redistribute it and/or modify
7  @ it under the terms of the GNU Lesser General Public License as published by
8  @ the Free Software Foundation; either version 2.1 of the License, or
9  @ (at your option) any later version.
10  @
11  @ This program is distributed in the hope that it will be useful,
12  @ but WITHOUT ANY WARRANTY; without even the implied warranty of
13  @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  @ GNU Lesser General Public License for more details.
15  @
16  @ You should have received a copy of the GNU Lesser General Public License
17  @ along with this program; if not, write to the Free Software Foundation,
18  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19  @****************************************************************************/
20
21         .fpu neon
22         .text
23
24 #define I       r0
25 #define IPAD    r1
26 #define WIDTH   r2
27 #define HEIGHT  r3
28 #define Y       r4
29 #define U       r5
30 #define V       r6
31 #define COUNT   ip
32 #define YPAD    lr
33
34         .align
35         .global yuyv_i422_neon
36         .type   yuyv_i422_neon, %function
37 yuyv_i422_neon:
38         push            {r4-r6,lr}
39         ldmia           r0,     {Y, U, V, YPAD}
40         ldmia           r1,     {I, IPAD}
41         cmp             HEIGHT, #0
42         sub             YPAD,   YPAD,   WIDTH
43         sub             IPAD,   IPAD,   WIDTH,  lsl #1
44 1:
45         movgts          COUNT,  WIDTH
46         pople           {r4-r6,pc}
47 2:
48         pld             [I, #64]
49         subs            COUNT,  COUNT,  #16
50         vld1.u8         {q0-q1},        [I,:128]!
51         vuzp.u8         q0,     q1
52         @ TODO: unroll (1 cycle stall)
53         vuzp.u8         d2,     d3
54         vst1.u8         {q0},           [Y,:128]!
55         vst1.u8         {d2},           [U,:64]!
56         vst1.u8         {d3},           [V,:64]!
57         bgt             2b
58
59         subs            HEIGHT, #1
60         add             I,      I,      IPAD
61         add             Y,      Y,      YPAD
62         add             U,      U,      YPAD,   lsr #1
63         add             V,      V,      YPAD,   lsr #1
64         b               1b
65
66         .global uyvy_i422_neon
67         .type   uyvy_i422_neon, %function
68 uyvy_i422_neon:
69         push            {r4-r6,lr}
70         ldmia           r0,     {Y, U, V, YPAD}
71         ldmia           r1,     {I, IPAD}
72         cmp             HEIGHT, #0
73         sub             YPAD,   YPAD,   WIDTH
74         sub             IPAD,   IPAD,   WIDTH,  lsl #1
75 1:
76         movgts          COUNT,  WIDTH
77         pople           {r4-r6,pc}
78 2:
79         pld             [I, #64]
80         subs            COUNT,  COUNT,  #16
81         vld1.u8         {q0-q1},        [I,:128]!
82         vuzp.u8         q0,     q1
83         vuzp.u8         d0,     d1
84         vst1.u8         {q1},           [Y,:128]!
85         vst1.u8         {d0},           [U,:64]!
86         vst1.u8         {d1},           [V,:64]!
87         bgt             2b
88
89         subs            HEIGHT, #1
90         add             I,      I,      IPAD
91         add             Y,      Y,      YPAD
92         add             U,      U,      YPAD,   lsr #1
93         add             V,      V,      YPAD,   lsr #1
94         b               1b