]> git.sesse.net Git - vlc/blobdiff - modules/video_chroma/i420_yuy2.h
video chromas: finalize SSE2 improvements
[vlc] / modules / video_chroma / i420_yuy2.h
index 1f35a3061f19fc3407dafc8e8922fadcffa323a3..a630157dc2e8c34012d8a2784ecff0b69cf09ae0 100644 (file)
@@ -366,8 +366,8 @@ movdqu    %%xmm1, 16(%1)  # Store high UYVY                               \n\
 #define SSE2_YUV420_YUYV_UNALIGNED                  \
     xmm1 = _mm_loadl_epi64((__m128i *)p_u);         \
     xmm2 = _mm_loadl_epi64((__m128i *)p_v);         \
-    xmm0 = _mm_load_si128((__m128i *)p_y1);         \
-    xmm3 = _mm_load_si128((__m128i *)p_y2);         \
+    xmm0 = _mm_loadu_si128((__m128i *)p_y1);        \
+    xmm3 = _mm_loadu_si128((__m128i *)p_y2);        \
     _mm_prefetch(p_line1, _MM_HINT_NTA);            \
     _mm_prefetch(p_line2, _MM_HINT_NTA);            \
     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);           \
@@ -402,8 +402,8 @@ movdqu    %%xmm1, 16(%1)  # Store high UYVY                               \n\
 #define SSE2_YUV420_YVYU_UNALIGNED                  \
     xmm1 = _mm_loadl_epi64((__m128i *)p_v);         \
     xmm2 = _mm_loadl_epi64((__m128i *)p_u);         \
-    xmm0 = _mm_load_si128((__m128i *)p_y1);         \
-    xmm3 = _mm_load_si128((__m128i *)p_y2);         \
+    xmm0 = _mm_loadu_si128((__m128i *)p_y1);        \
+    xmm3 = _mm_loadu_si128((__m128i *)p_y2);        \
     _mm_prefetch(p_line1, _MM_HINT_NTA);            \
     _mm_prefetch(p_line2, _MM_HINT_NTA);            \
     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);           \
@@ -439,8 +439,8 @@ movdqu    %%xmm1, 16(%1)  # Store high UYVY                               \n\
 #define SSE2_YUV420_UYVY_UNALIGNED                  \
     xmm1 = _mm_loadl_epi64((__m128i *)p_u);         \
     xmm2 = _mm_loadl_epi64((__m128i *)p_v);         \
-    xmm0 = _mm_load_si128((__m128i *)p_y1);         \
-    xmm3 = _mm_load_si128((__m128i *)p_y2);         \
+    xmm0 = _mm_loadu_si128((__m128i *)p_y1);        \
+    xmm3 = _mm_loadu_si128((__m128i *)p_y2);        \
     _mm_prefetch(p_line1, _MM_HINT_NTA);            \
     _mm_prefetch(p_line2, _MM_HINT_NTA);            \
     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);           \