From 5f8d701a9ab972ee7e3b25125b39fe24eb66c1cd Mon Sep 17 00:00:00 2001 From: Gildas Bazin Date: Sat, 20 Mar 2010 09:34:48 +0000 Subject: [PATCH] arm_neon: fix the color conversion to use the actual pitch of the pictures instead of trying to guess it. --- modules/arm_neon/i420_yuy2.c | 13 ++++++++----- modules/arm_neon/i420_yuyv.S | 23 ++++++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/modules/arm_neon/i420_yuy2.c b/modules/arm_neon/i420_yuy2.c index 608903beb1..f0ef0feb0a 100644 --- a/modules/arm_neon/i420_yuy2.c +++ b/modules/arm_neon/i420_yuy2.c @@ -36,16 +36,18 @@ vlc_module_begin () vlc_module_end () void i420_yuyv_neon (uint8_t *out, const uint8_t **in, - uintptr_t pitch, uintptr_t height); + unsigned int pitch, unsigned int s_off, + unsigned int height); static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst) { uint8_t *out = dst->p->p_pixels; const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, }; - size_t pitch = (filter->fmt_in.video.i_width + 15) & ~15; size_t height = filter->fmt_in.video.i_height; + int i_pitch = (dst->p->i_pitch >> 1) & ~0xF; + int s_offset = src->p->i_pitch - i_pitch; - i420_yuyv_neon (out, yuv, pitch, height); + i420_yuyv_neon (out, yuv, i_pitch, s_offset, height); } void i420_uyvy_neon (uint8_t *out, const uint8_t **in, @@ -55,10 +57,11 @@ static void I420_UYVY (filter_t *filter, picture_t *src, picture_t *dst) { uint8_t *out = dst->p->p_pixels; const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, }; - size_t pitch = (filter->fmt_in.video.i_width + 15) & ~15; size_t height = filter->fmt_in.video.i_height; + int i_pitch = (dst->p->i_pitch >> 1) & ~0xF; + int s_offset = src->p->i_pitch - i_pitch; - i420_yuyv_neon (out, yuv, pitch, height); + i420_yuyv_neon (out, yuv, i_pitch, s_offset, height); } VIDEO_FILTER_WRAPPER (I420_YUYV) diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S index 8d8645c372..427fd56c24 100644 --- a/modules/arm_neon/i420_yuyv.S +++ b/modules/arm_neon/i420_yuyv.S @@ -24,21 +24,24 @@ #define O1 r0 #define O2 r1 #define PITCH r2 -#define HEIGHT r3 +#define S_OFF r3 #define Y1 r4 #define Y2 r5 #define U r6 #define V r7 +#define HEIGHT r8 #define END_O1 r12 .align .global i420_yuyv_neon .type i420_yuyv_neon, %function i420_yuyv_neon: - push {r4-r7, lr} + push {r4-r8, lr} + ldr HEIGHT, [sp, #(4*6)] ldmia r1, {Y1, U, V} add O2, O1, PITCH, lsl #1 add Y2, Y1, PITCH + add Y2, S_OFF 1: mov END_O1, O2 pld [Y2] @@ -64,21 +67,27 @@ i420_yuyv_neon: sub HEIGHT, #2 mov O1, O2 add O2, PITCH, lsl #1 + add Y2, S_OFF mov Y1, Y2 add Y2, PITCH + add Y2, S_OFF + add U, S_OFF, lsr #1 + add V, S_OFF, lsr #1 cmp HEIGHT, #0 bne 1b - pop {r4-r7, pc} + pop {r4-r8, pc} .global i420_uyvy_neon .type i420_uyvy_neon, %function i420_uyvy_neon: - push {r4-r7, lr} + push {r4-r8, lr} + ldr HEIGHT, [sp, #(4*6)] ldmia r1, {Y1, U, V} add O2, O1, PITCH, lsl #1 add Y2, Y1, PITCH + add Y2, S_OFF 1: mov END_O1, O2 2: @@ -103,10 +112,14 @@ i420_uyvy_neon: sub HEIGHT, #2 mov O1, O2 add O2, PITCH, lsl #1 + add Y2, S_OFF mov Y1, Y2 add Y2, PITCH + add Y2, S_OFF + add U, S_OFF, lsr #1 + add V, S_OFF, lsr #1 cmp HEIGHT, #0 bne 1b - pop {r4-r7, pc} + pop {r4-r8, pc} -- 2.39.2