]> git.sesse.net Git - vlc/blobdiff - modules/arm_neon/i420_yuyv.S
Contribs: fix xml2 installation on OSX
[vlc] / modules / arm_neon / i420_yuyv.S
index 427fd56c24222c879622d019896ff1dfb73f68f1..67c3043cf8ab1d40962e697f94d6667df8fc9feb 100644 (file)
@@ -1,7 +1,7 @@
  @*****************************************************************************
  @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion
  @*****************************************************************************
- @ Copyright (C) 2009 Rémi Denis-Courmont
+ @ Copyright (C) 2009-2011 Rémi Denis-Courmont
  @
  @ This program is free software; you can redistribute it and/or modify
  @ it under the terms of the GNU General Public License as published by
        .text
 
 #define O1     r0
-#define        O2      r1
-#define        PITCH   r2
-#define        S_OFF   r3
-#define        Y1      r4
-#define        Y2      r5
-#define        U       r6
-#define        V       r7
-#define        HEIGHT  r8
-#define        END_O1  r12
+#define O2     r1
+#define WIDTH  r2
+#define HEIGHT r3
+#define Y1     r4
+#define Y2     r5
+#define U      r6
+#define V      r7
+#define YPITCH r8
+#define OPAD   r10
+#define YPAD   r11
+#define COUNT  ip
+#define OPITCH lr
 
        .align
        .global i420_yuyv_neon
        .type   i420_yuyv_neon, %function
 i420_yuyv_neon:
-       push            {r4-r8, lr}
-       ldr             HEIGHT, [sp, #(4*6)]
-       ldmia           r1,     {Y1, U, V}
-       add             O2,     O1,     PITCH, lsl #1
-       add             Y2,     Y1,     PITCH
-       add             Y2,     S_OFF
+       push            {r4-r8,r10-r11,lr}
+       ldmia           r0,     {O1, OPITCH}
+       ldmia           r1,     {Y1, U, V, YPITCH}
+       cmp             HEIGHT, #0
+       sub             OPAD,   OPITCH, WIDTH,  lsl #1
+       sub             YPAD,   YPITCH, WIDTH
 1:
-       mov             END_O1, O2
-       pld             [Y2]
+       movgts          COUNT,  WIDTH
+       add             O2,     O1,     OPITCH
+       add             Y2,     Y1,     YPITCH
+       pople           {r4-r8,r10-r11,pc}
 2:
        pld             [U, #64]
        vld1.u8         {d2},           [U,:64]!
@@ -52,6 +57,7 @@ i420_yuyv_neon:
        vld1.u8         {d3},           [V,:64]!
        pld             [Y1, #64]
        vzip.u8         d2,     d3
+       subs            COUNT,  COUNT,  #16
        vld1.u8         {q0},           [Y1,:128]!
        pld             [Y2, #64]
        vmov            q3,     q1
@@ -60,36 +66,29 @@ i420_yuyv_neon:
        vzip.u8         q2,     q3
        vst1.u8         {q0-q1},        [O1,:128]!
        vst1.u8         {q2-q3},        [O2,:128]!
+       bgt             2b
 
-       cmp             O1,     END_O1
-       bne             2b
-
-       sub             HEIGHT, #2
-       mov             O1,     O2
-       add             O2,     PITCH,  lsl #1
-       add             Y2,     S_OFF
-       mov             Y1,     Y2
-       add             Y2,     PITCH
-       add             Y2,     S_OFF
-       add             U,      S_OFF,  lsr #1
-       add             V,      S_OFF,  lsr #1
-
-       cmp             HEIGHT, #0
-       bne             1b
-
-       pop             {r4-r8, pc}
+       subs            HEIGHT, #2
+       add             O1,     O2,     OPAD
+       add             Y1,     Y2,     YPAD
+       add             U,      U,      YPAD,   lsr #1
+       add             V,      V,      YPAD,   lsr #1
+       b               1b
 
        .global i420_uyvy_neon
        .type   i420_uyvy_neon, %function
 i420_uyvy_neon:
-       push            {r4-r8, lr}
-       ldr             HEIGHT, [sp, #(4*6)]
-       ldmia           r1,     {Y1, U, V}
-       add             O2,     O1,     PITCH, lsl #1
-       add             Y2,     Y1,     PITCH
-       add             Y2,     S_OFF
+       push            {r4-r8,r10-r11,lr}
+       ldmia           r0,     {O1, OPITCH}
+       ldmia           r1,     {Y1, U, V, YPITCH}
+       cmp             HEIGHT, #0
+       sub             OPAD,   OPITCH, WIDTH,  lsl #1
+       sub             YPAD,   YPITCH, WIDTH
 1:
-       mov             END_O1, O2
+       movgts          COUNT,  WIDTH
+       add             O2,     O1,     OPITCH
+       add             Y2,     Y1,     YPITCH
+       pople           {r4-r8,r10-r11,pc}
 2:
        pld             [U, #64]
        vld1.u8         {d0},           [U,:64]!
@@ -97,6 +96,7 @@ i420_uyvy_neon:
        vld1.u8         {d1},           [V,:64]!
        pld             [Y1, #64]
        vzip.u8         d0,     d1
+       subs            COUNT,  COUNT,  #16
        vld1.u8         {q1},           [Y1,:128]!
        pld             [Y2, #64]
        vmov            q2,     q0
@@ -105,21 +105,11 @@ i420_uyvy_neon:
        vzip.u8         q2,     q3
        vst1.u8         {q0-q1},        [O1,:128]!
        vst1.u8         {q2-q3},        [O2,:128]!
+       bgt             2b
 
-       cmp             O1,     END_O1
-       bne             2b
-
-       sub             HEIGHT, #2
-       mov             O1,     O2
-       add             O2,     PITCH,  lsl #1
-       add             Y2,     S_OFF
-       mov             Y1,     Y2
-       add             Y2,     PITCH
-       add             Y2,     S_OFF
-       add             U,      S_OFF,  lsr #1
-       add             V,      S_OFF,  lsr #1
-
-       cmp             HEIGHT, #0
-       bne             1b
-
-       pop             {r4-r8, pc}
+       subs            HEIGHT, #2
+       add             O1,     O2,     OPAD
+       add             Y1,     Y2,     YPAD
+       add             U,      U,      YPAD,   lsr #1
+       add             V,      V,      YPAD,   lsr #1
+       b               1b