--- /dev/null
+ @*****************************************************************************
+ @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion
+ @*****************************************************************************
+ @ Copyright (C) 2009 RĂ©mi Denis-Courmont
+ @
+ @ This program is free software; you can redistribute it and/or modify
+ @ it under the terms of the GNU General Public License as published by
+ @ the Free Software Foundation; either version 2 of the License, or
+ @ (at your option) any later version.
+ @
+ @ This program is distributed in the hope that it will be useful,
+ @ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ @ GNU General Public License for more details.
+ @
+ @ You should have received a copy of the GNU General Public License
+ @ along with this program; if not, write to the Free Software Foundation,
+ @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ @****************************************************************************/
+
+ .fpu neon
+ .text
+
+#define O1 r0
+#define O2 r1
+#define PITCH r2
+#define HEIGHT r3
+#define Y1 r4
+#define Y2 r5
+#define U r6
+#define V r7
+#define END_O1 r12
+
+ .align
+ .global i420_yuyv_neon
+ .type i420_yuyv_neon, %function
+i420_yuyv_neon:
+ push {r4-r7, lr}
+ ldmia r1, {Y1, U, V}
+ add O2, O1, PITCH, lsl #1
+ add Y2, Y1, PITCH
+1:
+ mov END_O1, O2
+ pld [Y2]
+2:
+ pld [U, #64]
+ vld1.u8 {d2}, [U,:64]!
+ pld [V, #64]
+ vld1.u8 {d3}, [V,:64]!
+ pld [Y1, #64]
+ vzip.u8 d2, d3
+ vld1.u8 {q0}, [Y1,:128]!
+ pld [Y2, #64]
+ vmov q3, q1
+ vzip.u8 q0, q1
+ vld1.u8 {q2}, [Y2,:128]!
+ vzip.u8 q2, q3
+ vst1.u8 {q0-q1}, [O1,:128]!
+ vst1.u8 {q2-q3}, [O2,:128]!
+
+ cmp O1, END_O1
+ bne 2b
+
+ sub HEIGHT, #2
+ mov O1, O2
+ add O2, PITCH, lsl #1
+ mov Y1, Y2
+ add Y2, PITCH
+
+ cmp HEIGHT, #0
+ bne 1b
+
+ pop {r4-r7, pc}
+
+ .global i420_uyvy_neon
+ .type i420_uyvy_neon, %function
+i420_uyvy_neon:
+ push {r4-r7, lr}
+ ldmia r1, {Y1, U, V}
+ add O2, O1, PITCH, lsl #1
+ add Y2, Y1, PITCH
+1:
+ mov END_O1, O2
+2:
+ vld1.u8 {d0}, [U,:64]!
+ vld1.u8 {d1}, [V,:64]!
+ vzip.u8 d0, d1
+ vld1.u8 {q1}, [Y1,:128]!
+ vmov q2, q0
+ vzip.u8 q0, q1
+ vld1.u8 {q3}, [Y2,:128]!
+ vzip.u8 q2, q3
+ vst1.u8 {q0-q1}, [O1,:128]!
+ vst1.u8 {q2-q3}, [O2,:128]!
+
+ cmp O1, END_O1
+ bne 2b
+
+ sub HEIGHT, #2
+ mov O1, O2
+ add O2, PITCH, lsl #1
+ mov Y1, Y2
+ add Y2, PITCH
+
+ cmp HEIGHT, #0
+ bne 1b
+
+ pop {r4-r7, pc}