]> git.sesse.net Git - vlc/blobdiff - modules/arm_neon/i420_yuyv.S
arm_neon: fix the color conversion to use the actual pitch of the pictures instead...
[vlc] / modules / arm_neon / i420_yuyv.S
index 8d8645c372c71394d2eb77357b91dd3e370055fb..427fd56c24222c879622d019896ff1dfb73f68f1 100644 (file)
 #define O1     r0
 #define        O2      r1
 #define        PITCH   r2
-#define        HEIGHT  r3
+#define        S_OFF   r3
 #define        Y1      r4
 #define        Y2      r5
 #define        U       r6
 #define        V       r7
+#define        HEIGHT  r8
 #define        END_O1  r12
 
        .align
        .global i420_yuyv_neon
        .type   i420_yuyv_neon, %function
 i420_yuyv_neon:
-       push            {r4-r7, lr}
+       push            {r4-r8, lr}
+       ldr             HEIGHT, [sp, #(4*6)]
        ldmia           r1,     {Y1, U, V}
        add             O2,     O1,     PITCH, lsl #1
        add             Y2,     Y1,     PITCH
+       add             Y2,     S_OFF
 1:
        mov             END_O1, O2
        pld             [Y2]
@@ -64,21 +67,27 @@ i420_yuyv_neon:
        sub             HEIGHT, #2
        mov             O1,     O2
        add             O2,     PITCH,  lsl #1
+       add             Y2,     S_OFF
        mov             Y1,     Y2
        add             Y2,     PITCH
+       add             Y2,     S_OFF
+       add             U,      S_OFF,  lsr #1
+       add             V,      S_OFF,  lsr #1
 
        cmp             HEIGHT, #0
        bne             1b
 
-       pop             {r4-r7, pc}
+       pop             {r4-r8, pc}
 
        .global i420_uyvy_neon
        .type   i420_uyvy_neon, %function
 i420_uyvy_neon:
-       push            {r4-r7, lr}
+       push            {r4-r8, lr}
+       ldr             HEIGHT, [sp, #(4*6)]
        ldmia           r1,     {Y1, U, V}
        add             O2,     O1,     PITCH, lsl #1
        add             Y2,     Y1,     PITCH
+       add             Y2,     S_OFF
 1:
        mov             END_O1, O2
 2:
@@ -103,10 +112,14 @@ i420_uyvy_neon:
        sub             HEIGHT, #2
        mov             O1,     O2
        add             O2,     PITCH,  lsl #1
+       add             Y2,     S_OFF
        mov             Y1,     Y2
        add             Y2,     PITCH
+       add             Y2,     S_OFF
+       add             U,      S_OFF,  lsr #1
+       add             V,      S_OFF,  lsr #1
 
        cmp             HEIGHT, #0
        bne             1b
 
-       pop             {r4-r7, pc}
+       pop             {r4-r8, pc}