;* Copyright Nick Kurshev
;* Copyright Michael (michaelni@gmx.at)
;* Copyright 2018 Jokyo Images
+;* Copyright Ivo van Poorten
;*
;* This file is part of FFmpeg.
;*
SECTION_RODATA
+pb_mask_shuffle2103_mmx times 8 dw 255
pb_shuffle2103: db 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15
pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13
pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
%endif
%endmacro
+;------------------------------------------------------------------------------
+; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size)
+;------------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal shuffle_bytes_2103, 3, 5, 8, src, dst, w, tmp, x
+ mova m6, [pb_mask_shuffle2103_mmx]
+ mova m7, m6
+ psllq m7, 8
+
+ movsxdifnidn wq, wd
+ mov xq, wq
+
+ add srcq, wq
+ add dstq, wq
+ neg wq
+
+;calc scalar loop
+ and xq, mmsize*2 -4
+ je .loop_simd
+
+.loop_scalar:
+ mov tmpb, [srcq + wq + 2]
+ mov [dstq+wq + 0], tmpb
+ mov tmpb, [srcq + wq + 1]
+ mov [dstq+wq + 1], tmpb
+ mov tmpb, [srcq + wq + 0]
+ mov [dstq+wq + 2], tmpb
+ mov tmpb, [srcq + wq + 3]
+ mov [dstq+wq + 3], tmpb
+ add wq, 4
+ sub xq, 4
+ jg .loop_scalar
+
+;check if src_size < mmsize * 2
+cmp wq, 0
+jge .end
+
+.loop_simd:
+ movu m0, [srcq+wq]
+ movu m1, [srcq+wq+8]
+
+ pshufw m3, m0, 177
+ pshufw m5, m1, 177
+
+ pand m0, m7
+ pand m3, m6
+
+ pand m1, m7
+ pand m5, m6
+
+ por m0, m3
+ por m1, m5
+
+ movu [dstq+wq], m0
+ movu [dstq+wq + 8], m1
+
+ add wq, mmsize*2
+ jl .loop_simd
+
+.end:
+ RET
+
;------------------------------------------------------------------------------
; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------