X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libswscale%2Fx86%2Frgb_2_rgb.asm;h=29b856e281424076bc1d4442712f6e3ae4dc7c93;hb=a5e24be52ab0ebea908a566a1b0dbf2584f5631a;hp=156b4d2c746579fbbdaf9a55ef66fad17a9c39b2;hpb=9a88a47be4da9cd25a582feec7cc36790500b481;p=ffmpeg diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm index 156b4d2c746..29b856e2814 100644 --- a/libswscale/x86/rgb_2_rgb.asm +++ b/libswscale/x86/rgb_2_rgb.asm @@ -2,6 +2,7 @@ ;* Copyright Nick Kurshev ;* Copyright Michael (michaelni@gmx.at) ;* Copyright 2018 Jokyo Images +;* Copyright Ivo van Poorten ;* ;* This file is part of FFmpeg. ;* @@ -24,6 +25,7 @@ SECTION_RODATA +pb_mask_shuffle2103_mmx times 8 dw 255 pb_shuffle2103: db 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15 pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13 pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 @@ -42,6 +44,68 @@ SECTION .text %endif %endmacro +;------------------------------------------------------------------------------ +; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size) +;------------------------------------------------------------------------------ +INIT_MMX mmxext +cglobal shuffle_bytes_2103, 3, 5, 8, src, dst, w, tmp, x + mova m6, [pb_mask_shuffle2103_mmx] + mova m7, m6 + psllq m7, 8 + + movsxdifnidn wq, wd + mov xq, wq + + add srcq, wq + add dstq, wq + neg wq + +;calc scalar loop + and xq, mmsize*2 -4 + je .loop_simd + +.loop_scalar: + mov tmpb, [srcq + wq + 2] + mov [dstq+wq + 0], tmpb + mov tmpb, [srcq + wq + 1] + mov [dstq+wq + 1], tmpb + mov tmpb, [srcq + wq + 0] + mov [dstq+wq + 2], tmpb + mov tmpb, [srcq + wq + 3] + mov [dstq+wq + 3], tmpb + add wq, 4 + sub xq, 4 + jg .loop_scalar + +;check if src_size < mmsize * 2 +cmp wq, 0 +jge .end + +.loop_simd: + movu m0, [srcq+wq] + movu m1, [srcq+wq+8] + + pshufw m3, m0, 177 + pshufw m5, m1, 177 + + pand m0, m7 + pand m3, m6 + + pand m1, m7 + pand m5, m6 + + por m0, m3 + por m1, m5 + + movu [dstq+wq], m0 + movu [dstq+wq + 8], m1 + + add wq, mmsize*2 + jl .loop_simd + +.end: + RET + ;------------------------------------------------------------------------------ ; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size) ;------------------------------------------------------------------------------