]> git.sesse.net Git - vlc/blobdiff - modules/codec/avcodec/dxva2.c
Support WMA Lossless with recent enough libavcodec
[vlc] / modules / codec / avcodec / dxva2.c
index ddf52772b1508e2975245603f30966e74695c170..15849c5220a11d2c5ab40c77f359811f610c770e 100644 (file)
@@ -1,5 +1,5 @@
 /*****************************************************************************
- * va.c: Video Acceleration helpers
+ * dxva2.c: Video Acceleration helpers
  *****************************************************************************
  * Copyright (C) 2009 Geoffroy Couprie
  * Copyright (C) 2009 Laurent Aimar
 # include "config.h"
 #endif
 
+
+#if defined(HAVE_LIBAVCODEC_AVCODEC_H) && defined(HAVE_AVCODEC_DXVA2)
+# if _WIN32_WINNT < 0x600
+/* dxva2 needs Vista support */
+#  undef _WIN32_WINNT
+#  define _WIN32_WINNT 0x600
+# endif
+#endif
+
 #include <vlc_common.h>
 #include <vlc_picture.h>
 #include <vlc_fourcc.h>
 #ifdef HAVE_LIBAVCODEC_AVCODEC_H
 #   include <libavcodec/avcodec.h>
 #   ifdef HAVE_AVCODEC_DXVA2
+#       define DXVA2API_USE_BITFIELDS
+#       define COBJMACROS
 #       include <libavcodec/dxva2.h>
 #   endif
-#elif defined(HAVE_FFMPEG_AVCODEC_H)
-#   include <ffmpeg/avcodec.h>
 #else
 #   include <avcodec.h>
 #endif
 
 #include "avcodec.h"
 #include "va.h"
+#include "copy.h"
 
 #ifdef HAVE_AVCODEC_DXVA2
 
 #include <shlwapi.h>
 #include <d3d9.h>
 
-/* FIXME */
-#define CoTaskMemFree(x)
-
-/* */
-#define DXVA2_E_NOT_INITIALIZED     MAKE_HRESULT(1, 4, 4096)
-#define DXVA2_E_NEW_VIDEO_DEVICE    MAKE_HRESULT(1, 4, 4097)
-#define DXVA2_E_VIDEO_DEVICE_LOCKED MAKE_HRESULT(1, 4, 4098)
-#define DXVA2_E_NOT_AVAILABLE       MAKE_HRESULT(1, 4, 4099)
+#include <initguid.h> /* must be last included to not redefine existing GUIDs */
+
+/* dxva2api.h GUIDs: http://msdn.microsoft.com/en-us/library/windows/desktop/ms697067(v=vs100).aspx
+ * assume that they are declared in dxva2api.h */
+#define MS_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8)
+
+#ifdef __MINGW32__
+# include <_mingw.h>
+
+# if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 3
+#  undef  IDirect3DDeviceManager9_Release
+#  define IDirect3DDeviceManager9_Release(This) (This)->lpVtbl->Release(This)
+# endif
+
+# if !defined(__MINGW64_VERSION_MAJOR) || __MINGW64_VERSION_MAJOR < 3
+#  undef MS_GUID
+#  define MS_GUID DEFINE_GUID /* dxva2api.h fails to declare those, redefine as static */
+#  define DXVA2_E_NEW_VIDEO_DEVICE MAKE_HRESULT(1, 4, 4097)
+# endif
+
+#endif /* __MINGW32__ */
+
+MS_GUID(IID_IDirectXVideoDecoderService, 0xfc51a551, 0xd5e7, 0x11d9, 0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02);
+MS_GUID(IID_IDirectXVideoAccelerationService, 0xfc51a550, 0xd5e7, 0x11d9, 0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02);
+
+MS_GUID    (DXVA_NoEncrypt,                         0x1b81bed0, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+
+/* Codec capabilities GUID, sorted by codec */
+MS_GUID    (DXVA2_ModeMPEG2_MoComp,                 0xe6a9f44b, 0x61b0, 0x4563, 0x9e, 0xa4, 0x63, 0xd2, 0xa3, 0xc6, 0xfe, 0x66);
+MS_GUID    (DXVA2_ModeMPEG2_IDCT,                   0xbf22ad00, 0x03ea, 0x4690, 0x80, 0x77, 0x47, 0x33, 0x46, 0x20, 0x9b, 0x7e);
+MS_GUID    (DXVA2_ModeMPEG2_VLD,                    0xee27417f, 0x5e28, 0x4e65, 0xbe, 0xea, 0x1d, 0x26, 0xb5, 0x08, 0xad, 0xc9);
+DEFINE_GUID(DXVA2_ModeMPEG2and1_VLD,                0x86695f12, 0x340e, 0x4f04, 0x9f, 0xd3, 0x92, 0x53, 0xdd, 0x32, 0x74, 0x60);
+DEFINE_GUID(DXVA2_ModeMPEG1_VLD,                    0x6f3ec719, 0x3735, 0x42cc, 0x80, 0x63, 0x65, 0xcc, 0x3c, 0xb3, 0x66, 0x16);
+
+MS_GUID    (DXVA2_ModeH264_A,                       0x1b81be64, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_B,                       0x1b81be65, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_C,                       0x1b81be66, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_D,                       0x1b81be67, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_E,                       0x1b81be68, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_F,                       0x1b81be69, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH264_VLD_Multiview,            0x9901CCD3, 0xca12, 0x4b7e, 0x86, 0x7a, 0xe2, 0x22, 0x3d, 0x92, 0x55, 0xc3); // MVC
+DEFINE_GUID(DXVA_ModeH264_VLD_WithFMOASO_NoFGT,     0xd5f04ff9, 0x3418, 0x45d8, 0x95, 0x61, 0x32, 0xa7, 0x6a, 0xae, 0x2d, 0xdd);
+DEFINE_GUID(DXVADDI_Intel_ModeH264_A,               0x604F8E64, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6);
+DEFINE_GUID(DXVADDI_Intel_ModeH264_C,               0x604F8E66, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6);
+DEFINE_GUID(DXVADDI_Intel_ModeH264_E,               0x604F8E68, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6); // DXVA_Intel_H264_NoFGT_ClearVideo
+DEFINE_GUID(DXVA_ModeH264_VLD_NoFGT_Flash,          0x4245F676, 0x2BBC, 0x4166, 0xa0, 0xBB, 0x54, 0xE7, 0xB8, 0x49, 0xC3, 0x80);
+
+MS_GUID    (DXVA2_ModeWMV8_A,                       0x1b81be80, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeWMV8_B,                       0x1b81be81, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+
+MS_GUID    (DXVA2_ModeWMV9_A,                       0x1b81be90, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeWMV9_B,                       0x1b81be91, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeWMV9_C,                       0x1b81be94, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+
+MS_GUID    (DXVA2_ModeVC1_A,                        0x1b81beA0, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeVC1_B,                        0x1b81beA1, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeVC1_C,                        0x1b81beA2, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeVC1_D,                        0x1b81beA3, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA2_ModeVC1_D2010,                    0x1b81beA4, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); // August 2010 update
+DEFINE_GUID(DXVA_Intel_VC1_ClearVideo,              0xBCC5DB6D, 0xA2B6, 0x4AF0, 0xAC, 0xE4, 0xAD, 0xB1, 0xF7, 0x87, 0xBC, 0x89);
+DEFINE_GUID(DXVA_Intel_VC1_ClearVideo_2,            0xE07EC519, 0xE651, 0x4CD6, 0xAC, 0x84, 0x13, 0x70, 0xCC, 0xEE, 0xC8, 0x51);
+
+DEFINE_GUID(DXVA_nVidia_MPEG4_ASP,                  0x9947EC6F, 0x689B, 0x11DC, 0xA3, 0x20, 0x00, 0x19, 0xDB, 0xBC, 0x41, 0x84);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_Simple,           0xefd64d74, 0xc9e8, 0x41d7, 0xa5, 0xe9, 0xe9, 0xb0, 0xe3, 0x9f, 0xa3, 0x19);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_AdvSimple_NoGMC,  0xed418a9f, 0x010d, 0x4eda, 0x9a, 0xe3, 0x9a, 0x65, 0x35, 0x8d, 0x8d, 0x2e);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_AdvSimple_GMC,    0xab998b5b, 0x4258, 0x44a9, 0x9f, 0xeb, 0x94, 0xe5, 0x97, 0xa6, 0xba, 0xae);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_AdvSimple_Avivo,  0x7C74ADC6, 0xe2ba, 0x4ade, 0x86, 0xde, 0x30, 0xbe, 0xab, 0xb4, 0x0c, 0xc1);
 
-static const GUID DXVA2_ModeMPEG2_MoComp = {
-    0xe6a9f44b, 0x61b0, 0x4563, {0x9e,0xa4,0x63,0xd2,0xa3,0xc6,0xfe,0x66}
-};
-static const GUID DXVA2_ModeMPEG2_IDCT = {
-  0xbf22ad00, 0x03ea, 0x4690, {0x80,0x77,0x47,0x33,0x46,0x20,0x9b,0x7e}
-};
-static const GUID DXVA2_ModeMPEG2_VLD = {
-   0xee27417f, 0x5e28, 0x4e65, {0xbe,0xea,0x1d,0x26,0xb5,0x08,0xad,0xc9}
-};
-
-static const GUID DXVA2_ModeH264_A = {
-    0x1b81be64, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeH264_B = {
-    0x1b81be65, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeH264_C = {
-    0x1b81be66, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeH264_D = {
-    0x1b81be67, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeH264_E = {
-    0x1b81be68, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeH264_F = {
-    0x1b81be69, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeWMV8_A = {
-    0x1b81be80, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeWMV8_B = {
-    0x1b81be81, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeWMV9_A = {
-    0x1b81be90, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeWMV9_B = {
-    0x1b81be91, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeWMV9_C = {
-    0x1b81be94, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-
-static const GUID DXVA2_ModeVC1_A = {
-    0x1b81beA0, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeVC1_B = {
-    0x1b81beA1, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeVC1_C = {
-    0x1b81beA2, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
-static const GUID DXVA2_ModeVC1_D = {
-    0x1b81beA3, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
-};
 
 /* */
 typedef struct {
@@ -130,28 +143,55 @@ typedef struct {
 } dxva2_mode_t;
 /* XXX Prefered modes must come first */
 static const dxva2_mode_t dxva2_modes[] = {
-    { "DXVA2_ModeMPEG2_VLD",    &DXVA2_ModeMPEG2_VLD,     0 },
-    { "DXVA2_ModeMPEG2_MoComp", &DXVA2_ModeMPEG2_MoComp,  0 },
-    { "DXVA2_ModeMPEG2_IDCT",   &DXVA2_ModeMPEG2_IDCT,    0 },
-
-    { "H.264 variable-length decoder (VLD), FGT",               &DXVA2_ModeH264_F, CODEC_ID_H264 },
-    { "H.264 VLD, no FGT",                                      &DXVA2_ModeH264_E, CODEC_ID_H264 },
-    { "H.264 IDCT, FGT",                                        &DXVA2_ModeH264_D, 0,            },
-    { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C, 0,            },
-    { "H.264 MoComp, FGT",                                      &DXVA2_ModeH264_B, 0,            },
-    { "H.264 motion compensation (MoComp), no FGT",             &DXVA2_ModeH264_A, 0,            },
-
-    { "Windows Media Video 8 MoComp",           &DXVA2_ModeWMV8_B, 0 },
-    { "Windows Media Video 8 post processing",  &DXVA2_ModeWMV8_A, 0 },
-
-    {  "Windows Media Video 9 IDCT",            &DXVA2_ModeWMV9_C, 0 },
-    {  "Windows Media Video 9 MoComp",          &DXVA2_ModeWMV9_B, 0 },
-    {  "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A, 0 },
-
-    { "VC-1 VLD",             &DXVA2_ModeVC1_D, 0 },
-    { "VC-1 IDCT",            &DXVA2_ModeVC1_C, 0 },
-    { "VC-1 MoComp",          &DXVA2_ModeVC1_B, 0 },
-    { "VC-1 post processing", &DXVA2_ModeVC1_A, 0 },
+    /* MPEG-1/2 */
+    { "MPEG-2 variable-length decoder",                                               &DXVA2_ModeMPEG2_VLD,                   CODEC_ID_MPEG2VIDEO },
+    { "MPEG-2 & MPEG-1 variable-length decoder",                                      &DXVA2_ModeMPEG2and1_VLD,               CODEC_ID_MPEG2VIDEO },
+    { "MPEG-2 motion compensation",                                                   &DXVA2_ModeMPEG2_MoComp,                0 },
+    { "MPEG-2 inverse discrete cosine transform",                                     &DXVA2_ModeMPEG2_IDCT,                  0 },
+
+    { "MPEG-1 variable-length decoder",                                               &DXVA2_ModeMPEG1_VLD,                   0 },
+
+    /* H.264 */
+    { "H.264 variable-length decoder, film grain technology",                         &DXVA2_ModeH264_F,                      CODEC_ID_H264 },
+    { "H.264 variable-length decoder, no film grain technology",                      &DXVA2_ModeH264_E,                      CODEC_ID_H264 },
+    { "H.264 variable-length decoder, no film grain technology (Intel ClearVideo)",   &DXVADDI_Intel_ModeH264_E,              CODEC_ID_H264 },
+    { "H.264 variable-length decoder, no film grain technology, FMO/ASO",             &DXVA_ModeH264_VLD_WithFMOASO_NoFGT,    CODEC_ID_H264 },
+    { "H.264 variable-length decoder, no film grain technology, Flash",               &DXVA_ModeH264_VLD_NoFGT_Flash,         CODEC_ID_H264 },
+
+    { "H.264 inverse discrete cosine transform, film grain technology",               &DXVA2_ModeH264_D,                      0 },
+    { "H.264 inverse discrete cosine transform, no film grain technology",            &DXVA2_ModeH264_C,                      0 },
+    { "H.264 inverse discrete cosine transform, no film grain technology (Intel)",    &DXVADDI_Intel_ModeH264_C,              0 },
+
+    { "H.264 motion compensation, film grain technology",                             &DXVA2_ModeH264_B,                      0 },
+    { "H.264 motion compensation, no film grain technology",                          &DXVA2_ModeH264_A,                      0 },
+    { "H.264 motion compensation, no film grain technology (Intel)",                  &DXVADDI_Intel_ModeH264_A,              0 },
+
+    /* WMV */
+    { "Windows Media Video 8 motion compensation",                                    &DXVA2_ModeWMV8_B,                      0 },
+    { "Windows Media Video 8 post processing",                                        &DXVA2_ModeWMV8_A,                      0 },
+
+    { "Windows Media Video 9 IDCT",                                                   &DXVA2_ModeWMV9_C,                      0 },
+    { "Windows Media Video 9 motion compensation",                                    &DXVA2_ModeWMV9_B,                      0 },
+    { "Windows Media Video 9 post processing",                                        &DXVA2_ModeWMV9_A,                      0 },
+
+    /* VC-1 */
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D,                       CODEC_ID_VC1 },
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D,                       CODEC_ID_WMV3 },
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D2010,                   CODEC_ID_VC1 },
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D2010,                   CODEC_ID_WMV3 },
+    { "VC-1 variable-length decoder 2 (Intel)",                                       &DXVA_Intel_VC1_ClearVideo_2,           0 },
+    { "VC-1 variable-length decoder (Intel)",                                         &DXVA_Intel_VC1_ClearVideo,             0 },
+
+    { "VC-1 inverse discrete cosine transform",                                       &DXVA2_ModeVC1_C,                       0 },
+    { "VC-1 motion compensation",                                                     &DXVA2_ModeVC1_B,                       0 },
+    { "VC-1 post processing",                                                         &DXVA2_ModeVC1_A,                       0 },
+
+    /* Xvid/Divx: TODO */
+    { "MPEG-4 Part 2 nVidia bitstream decoder",                                       &DXVA_nVidia_MPEG4_ASP,                 0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple Profile",                        &DXVA_ModeMPEG4pt2_VLD_Simple,          0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple&Advanced Profile, no GMC",       &DXVA_ModeMPEG4pt2_VLD_AdvSimple_NoGMC, 0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple&Advanced Profile, GMC",          &DXVA_ModeMPEG4pt2_VLD_AdvSimple_GMC,   0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple&Advanced Profile, Avivo",        &DXVA_ModeMPEG4pt2_VLD_AdvSimple_Avivo, 0 },
 
     { NULL, NULL, 0 }
 };
@@ -166,7 +206,6 @@ static const dxva2_mode_t *Dxva2FindMode(const GUID *guid)
 }
 
 /* */
-#define VLC_CODEC_NV12 VLC_FOURCC('N','V','1','2') /* TODO move to vlc_fourcc.h */
 typedef struct {
     const char   *name;
     D3DFORMAT    format;
@@ -176,6 +215,7 @@ typedef struct {
 static const d3d_format_t d3d_formats[] = {
     { "YV12",   MAKEFOURCC('Y','V','1','2'),    VLC_CODEC_YV12 },
     { "NV12",   MAKEFOURCC('N','V','1','2'),    VLC_CODEC_NV12 },
+    { "IMC3",   MAKEFOURCC('I','M','C','3'),    VLC_CODEC_YV12 },
 
     { NULL, 0, 0 }
 };
@@ -189,13 +229,6 @@ static const d3d_format_t *D3dFindFormat(D3DFORMAT format)
     return NULL;
 }
 
-static const GUID IID_IDirectXVideoDecoderService = {
-    0xfc51a551, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
-};
-static const GUID IID_IDirectXVideoAccelerationService = {
-    0xfc51a550, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
-};
-
 /* */
 typedef struct {
     LPDIRECT3DSURFACE9 d3d;
@@ -212,35 +245,36 @@ typedef struct
     /* */
     vlc_object_t *log;
     int          codec_id;
+    int          width;
+    int          height;
 
     /* DLL */
-       HINSTANCE             hd3d9_dll;
+    HINSTANCE             hd3d9_dll;
     HINSTANCE             hdxva2_dll;
 
     /* Direct3D */
-       D3DPRESENT_PARAMETERS d3dpp;
-       LPDIRECT3D9           d3dobj;
-       LPDIRECT3DDEVICE9     d3ddev;
+    D3DPRESENT_PARAMETERS  d3dpp;
+    LPDIRECT3D9            d3dobj;
+    D3DADAPTER_IDENTIFIER9 d3dai;
+    LPDIRECT3DDEVICE9      d3ddev;
 
     /* Device manager */
     UINT                     token;
-    LPDIRECT3DDEVICEMANAGER9 devmng;
+    IDirect3DDeviceManager9  *devmng;
     HANDLE                   device;
 
     /* Video service */
-    LPDIRECTXVIDEODECODERSERVICE vs;
+    IDirectXVideoDecoderService  *vs;
     GUID                         input;
     D3DFORMAT                    render;
 
     /* Video decoder */
     DXVA2_ConfigPictureDecode    cfg;
-    LPDIRECTXVIDEODECODER        decoder;
+    IDirectXVideoDecoder         *decoder;
 
     /* Option conversion */
     D3DFORMAT                    output;
-    uint8_t                      *surface_cache_base;
-    uint8_t                      *surface_cache;
-    size_t                       surface_cache_size;
+    copy_cache_t                 surface_cache;
 
     /* */
     struct dxva_context hw;
@@ -283,21 +317,13 @@ static int DxResetVideoDecoder(vlc_va_dxva2_t *);
 static void DxCreateVideoConversion(vlc_va_dxva2_t *);
 static void DxDestroyVideoConversion(vlc_va_dxva2_t *);
 
-static void CopyFromNv12(picture_t *dst, const D3DLOCKED_RECT *src,
-                         uint8_t *cache, size_t cache_size,
-                         unsigned width, unsigned height);
-static void CopyFromYv12(picture_t *dst, const D3DLOCKED_RECT *src,
-                         uint8_t *cache, size_t cache_size,
-                         unsigned width, unsigned height);
-
 /* */
 static int Setup(vlc_va_t *external, void **hw, vlc_fourcc_t *chroma,
                  int width, int height)
 {
     vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
 
-    if (va->surface_width  == width &&
-        va->surface_height == height)
+    if (va->width == width && va->height == height && va->decoder)
         goto ok;
 
     /* */
@@ -319,8 +345,7 @@ static int Setup(vlc_va_t *external, void **hw, vlc_fourcc_t *chroma,
         return VLC_EGENERIC;
     /* */
     va->hw.decoder = va->decoder;
-    if (va->codec_id == CODEC_ID_H264)
-        va->hw.cfg = &va->cfg;
+    va->hw.cfg = &va->cfg;
     va->hw.surface_count = va->surface_count;
     va->hw.surface = va->hw_surface;
     for (unsigned i = 0; i < va->surface_count; i++)
@@ -343,7 +368,7 @@ static int Extract(vlc_va_t *external, picture_t *picture, AVFrame *ff)
     vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
     LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)ff->data[3];
 
-    if (!va->surface_cache)
+    if (!va->surface_cache.buffer)
         return VLC_EGENERIC;
 
     /* */
@@ -356,15 +381,45 @@ static int Extract(vlc_va_t *external, picture_t *picture, AVFrame *ff)
         return VLC_EGENERIC;
     }
 
-    if (va->render == MAKEFOURCC('Y','V','1','2')) {
-        CopyFromYv12(picture, &lock,
-                     va->surface_cache, va->surface_cache_size,
-                     va->surface_width, va->surface_height);
+    if (va->render == MAKEFOURCC('Y','V','1','2') ||
+        va->render == MAKEFOURCC('I','M','C','3')) {
+        bool imc3 = va->render == MAKEFOURCC('I','M','C','3');
+        size_t chroma_pitch = imc3 ? lock.Pitch : (lock.Pitch / 2);
+
+        size_t pitch[3] = {
+            lock.Pitch,
+            chroma_pitch,
+            chroma_pitch,
+        };
+
+        uint8_t *plane[3] = {
+            (uint8_t*)lock.pBits,
+            (uint8_t*)lock.pBits + pitch[0] * va->surface_height,
+            (uint8_t*)lock.pBits + pitch[0] * va->surface_height
+                                 + pitch[1] * va->surface_height / 2,
+        };
+
+        if (imc3) {
+            uint8_t *V = plane[1];
+            plane[1] = plane[2];
+            plane[2] = V;
+        }
+        CopyFromYv12(picture, plane, pitch,
+                     va->width, va->height,
+                     &va->surface_cache);
     } else {
         assert(va->render == MAKEFOURCC('N','V','1','2'));
-        CopyFromNv12(picture, &lock,
-                     va->surface_cache, va->surface_cache_size,
-                     va->surface_width, va->surface_height);
+        uint8_t *plane[2] = {
+            lock.pBits,
+            (uint8_t*)lock.pBits + lock.Pitch * va->surface_height
+        };
+        size_t  pitch[2] = {
+            lock.Pitch,
+            lock.Pitch,
+        };
+        CopyFromNv12(picture, plane, pitch,
+                     va->width, va->height,
+                     &va->surface_cache);
     }
 
     /* */
@@ -531,6 +586,14 @@ static int D3dCreateDevice(vlc_va_dxva2_t *va)
     }
     va->d3dobj = d3dobj;
 
+    /* */
+    D3DADAPTER_IDENTIFIER9 *d3dai = &va->d3dai;
+    if (FAILED(IDirect3D9_GetAdapterIdentifier(va->d3dobj,
+                                               D3DADAPTER_DEFAULT, 0, d3dai))) {
+        msg_Warn(va->log, "IDirect3D9_GetAdapterIdentifier failed");
+        ZeroMemory(d3dai, sizeof(*d3dai));
+    }
+
     /* */
     D3DPRESENT_PARAMETERS *d3dpp = &va->d3dpp;
     ZeroMemory(d3dpp, sizeof(*d3dpp));
@@ -577,17 +640,30 @@ static void D3dDestroyDevice(vlc_va_dxva2_t *va)
  */
 static char *DxDescribe(vlc_va_dxva2_t *va)
 {
-    D3DADAPTER_IDENTIFIER9 id;
-    ZeroMemory(&id, sizeof(id));
-
-    if (FAILED(IDirect3D9_GetAdapterIdentifier(va->d3dobj,
-                                               D3DADAPTER_DEFAULT, 0, &id)))
-        return strdup("DXVA2 (unknown)");
+    static const struct {
+        unsigned id;
+        char     name[32];
+    } vendors [] = {
+        { 0x1002, "ATI" },
+        { 0x10DE, "NVIDIA" },
+        { 0x8086, "Intel" },
+        { 0x5333, "S3 Graphics" },
+        { 0, "" }
+    };
+    D3DADAPTER_IDENTIFIER9 *id = &va->d3dai;
+
+    const char *vendor = "Unknown";
+    for (int i = 0; vendors[i].id != 0; i++) {
+        if (vendors[i].id == id->VendorId) {
+            vendor = vendors[i].name;
+            break;
+        }
+    }
 
     char *description;
-    if (asprintf(&description, "DXVA2 (%.*s, vendor %d, device %d, revision %d)",
-                 sizeof(id.Description), id.Description,
-                 id.VendorId, id.DeviceId, id.Revision) < 0)
+    if (asprintf(&description, "DXVA2 (%.*s, vendor %lu(%s), device %lu, revision %lu)",
+                 sizeof(id->Description), id->Description,
+                 id->VendorId, vendor, id->DeviceId, id->Revision) < 0)
         return NULL;
     return description;
 }
@@ -604,13 +680,13 @@ static int D3dCreateDeviceManager(vlc_va_dxva2_t *va)
                              TEXT("DXVA2CreateDirect3DDeviceManager9"));
 
     if (!CreateDeviceManager9) {
-        msg_Err(va->log, "cannot load function\n");
+        msg_Err(va->log, "cannot load function");
         return VLC_EGENERIC;
     }
     msg_Dbg(va->log, "OurDirect3DCreateDeviceManager9 Success!");
 
     UINT token;
-    LPDIRECT3DDEVICEMANAGER9 devmng;
+    IDirect3DDeviceManager9 *devmng;
     if (FAILED(CreateDeviceManager9(&token, &devmng))) {
         msg_Err(va->log, " OurDirect3DCreateDeviceManager9 failed");
         return VLC_EGENERIC;
@@ -648,7 +724,7 @@ static int DxCreateVideoService(vlc_va_dxva2_t *va)
                              TEXT("DXVA2CreateVideoService"));
 
     if (!CreateVideoService) {
-        msg_Err(va->log, "cannot load function\n");
+        msg_Err(va->log, "cannot load function");
         return 4;
     }
     msg_Info(va->log, "DXVA2CreateVideoService Success!");
@@ -663,10 +739,10 @@ static int DxCreateVideoService(vlc_va_dxva2_t *va)
     }
     va->device = device;
 
-    LPDIRECTXVIDEODECODERSERVICE vs;
+    IDirectXVideoDecoderService *vs;
     hr = IDirect3DDeviceManager9_GetVideoService(va->devmng, device,
                                                  &IID_IDirectXVideoDecoderService,
-                                                 &vs);
+                                                 (void**)&vs);
     if (FAILED(hr)) {
         msg_Err(va->log, "GetVideoService failed");
         return VLC_EGENERIC;
@@ -780,7 +856,12 @@ static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
     msg_Dbg(va->log, "DxCreateVideoDecoder id %d %dx%d",
             codec_id, fmt->i_width, fmt->i_height);
 
+    va->width  = fmt->i_width;
+    va->height = fmt->i_height;
+
     /* Allocates all surfaces needed for the decoder */
+    va->surface_width  = (fmt->i_width  + 15) & ~15;
+    va->surface_height = (fmt->i_height + 15) & ~15;
     switch (codec_id) {
     case CODEC_ID_H264:
         va->surface_count = 16 + 1;
@@ -791,8 +872,8 @@ static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
     }
     LPDIRECT3DSURFACE9 surface_list[VA_DXVA2_MAX_SURFACE_COUNT];
     if (FAILED(IDirectXVideoDecoderService_CreateSurface(va->vs,
-                                                         fmt->i_width,
-                                                         fmt->i_height,
+                                                         va->surface_width,
+                                                         va->surface_height,
                                                          va->surface_count - 1,
                                                          va->render,
                                                          D3DPOOL_DEFAULT,
@@ -800,7 +881,7 @@ static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
                                                          DXVA2_VideoDecoderRenderTarget,
                                                          surface_list,
                                                          NULL))) {
-        msg_Err(va->log, "IDirectXVideoAccelerationService_CreateSurface failed\n");
+        msg_Err(va->log, "IDirectXVideoAccelerationService_CreateSurface failed");
         va->surface_count = 0;
         return VLC_EGENERIC;
     }
@@ -810,8 +891,6 @@ static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
         surface->refcount = 0;
         surface->order = 0;
     }
-    va->surface_width  = fmt->i_width;
-    va->surface_height = fmt->i_height;
     msg_Dbg(va->log, "IDirectXVideoAccelerationService_CreateSurface succeed with %d surfaces (%dx%d)",
             va->surface_count, fmt->i_width, fmt->i_height);
 
@@ -851,13 +930,13 @@ static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
                                                                     NULL,
                                                                     &cfg_count,
                                                                     &cfg_list))) {
-        msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderConfigurations failed\n");
+        msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderConfigurations failed");
         return VLC_EGENERIC;
     }
     msg_Dbg(va->log, "we got %d decoder configurations", cfg_count);
 
     /* Select the best decoder configuration */
-    bool has_cfg = false;
+    int cfg_score = 0;
     for (unsigned i = 0; i < cfg_count; i++) {
         const DXVA2_ConfigPictureDecode *cfg = &cfg_list[i];
 
@@ -866,20 +945,29 @@ static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
                 i, cfg->ConfigBitstreamRaw);
 
         /* */
-        if ((!has_cfg && cfg->ConfigBitstreamRaw == 1) ||
-            (codec_id == CODEC_ID_H264 && cfg->ConfigBitstreamRaw == 2)) {
+        int score;
+        if (cfg->ConfigBitstreamRaw == 1)
+            score = 1;
+        else if (codec_id == CODEC_ID_H264 && cfg->ConfigBitstreamRaw == 2)
+            score = 2;
+        else
+            continue;
+        if (IsEqualGUID(&cfg->guidConfigBitstreamEncryption, &DXVA_NoEncrypt))
+            score += 16;
+
+        if (cfg_score < score) {
             va->cfg = *cfg;
-            has_cfg = true;
+            cfg_score = score;
         }
     }
     CoTaskMemFree(cfg_list);
-    if (!has_cfg) {
+    if (cfg_score <= 0) {
         msg_Err(va->log, "Failed to find a supported decoder configuration");
         return VLC_EGENERIC;
     }
 
     /* Create the decoder */
-    LPDIRECTXVIDEODECODER decoder;
+    IDirectXVideoDecoder *decoder;
     if (FAILED(IDirectXVideoDecoderService_CreateVideoDecoder(va->vs,
                                                               &va->input,
                                                               &dsc,
@@ -887,7 +975,7 @@ static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
                                                               surface_list,
                                                               va->surface_count,
                                                               &decoder))) {
-        msg_Err(va->log, "IDirectXVideoDecoderService_CreateVideoDecoder failed\n");
+        msg_Err(va->log, "IDirectXVideoDecoderService_CreateVideoDecoder failed");
         return VLC_EGENERIC;
     }
     va->decoder = decoder;
@@ -914,330 +1002,17 @@ static void DxCreateVideoConversion(vlc_va_dxva2_t *va)
 {
     switch (va->render) {
     case MAKEFOURCC('N','V','1','2'):
+    case MAKEFOURCC('I','M','C','3'):
         va->output = MAKEFOURCC('Y','V','1','2');
         break;
     default:
         va->output = va->render;
         break;
     }
-    va->surface_cache_size = __MAX((va->surface_width + 0x0f) & ~ 0x0f, 4096);
-    va->surface_cache_base = malloc(16 + va->surface_cache_size);
-    va->surface_cache      = &va->surface_cache_base[16 - ((intptr_t)va->surface_cache_base & 0x0f)];
+    CopyInitCache(&va->surface_cache, va->surface_width);
 }
 static void DxDestroyVideoConversion(vlc_va_dxva2_t *va)
 {
-    free(va->surface_cache_base);
-    va->surface_cache_base = NULL;
-    va->surface_cache      = NULL;
-    va->surface_cache_size = 0;
-}
-
-/* Copy 64 bytes from srcp to dsp loading data with the SSE>=2 instruction load and
- * storing data with the SSE>=2 instruction store.
- */
-#define COPY64(dstp, srcp, load, store) \
-    asm volatile (                      \
-        load "  0(%[src]), %%xmm1\n"    \
-        load " 16(%[src]), %%xmm2\n"    \
-        load " 32(%[src]), %%xmm3\n"    \
-        load " 48(%[src]), %%xmm4\n"    \
-        store " %%xmm1,    0(%[dst])\n" \
-        store " %%xmm2,   16(%[dst])\n" \
-        store " %%xmm3,   32(%[dst])\n" \
-        store " %%xmm4,   48(%[dst])\n" \
-        : : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
-
-/* Execute the instruction op only if SSE2 is supported. */
-#ifdef CAN_COMPILE_SSE2
-#   define ASM_SSE2(cpu, op) do {          \
-        if (cpu & CPU_CAPABILITY_SSE2)  \
-            asm volatile (op);    \
-    } while (0)
-#else
-#   define ASM_SSE2(cpu, op)
-#endif
-
-/* Optimized copy from "Uncacheable Speculative Write Combining" memory
- * as used by some video surface.
- * XXX It is really efficient only when SSE4.1 is available.
- */
-static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
-                         const uint8_t *src, size_t src_pitch,
-                         unsigned unaligned,
-                         unsigned width, unsigned height,
-                         unsigned cpu)
-{
-    assert(((intptr_t)dst & 0x0f) == 0 && (dst_pitch & 0x0f) == 0);
-
-    ASM_SSE2(cpu, "mfence");
-    for (unsigned y = 0; y < height; y++) {
-        unsigned x;
-
-        for (x = 0; x < unaligned; x++)
-            dst[x] = src[x];
-
-#ifdef CAN_COMPILE_SSE4_1
-        if (cpu & CPU_CAPABILITY_SSE4_1) {
-            if (!unaligned) {
-                for (; x+63 < width; x += 64)
-                    COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
-            } else {
-                for (; x+63 < width; x += 64)
-                    COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
-            }
-        } else
-#endif
-#ifdef CAN_COMPILE_SSE2
-        if (cpu & CPU_CAPABILITY_SSE2) {
-            if (!unaligned) {
-                for (; x+63 < width; x += 64)
-                    COPY64(&dst[x], &src[x], "movdqa", "movdqa");
-            } else {
-                for (; x+63 < width; x += 64)
-                    COPY64(&dst[x], &src[x], "movdqa", "movdqu");
-            }
-        }
-#endif
-
-        for (; x < width; x++)
-            dst[x] = src[x];
-
-        src += src_pitch;
-        dst += dst_pitch;
-    }
-}
-
-static void Copy2d(uint8_t *dst, size_t dst_pitch,
-                   const uint8_t *src, size_t src_pitch,
-                   unsigned width, unsigned height,
-                   unsigned cpu)
-{
-    assert(((intptr_t)src & 0x0f) == 0 && (src_pitch & 0x0f) == 0);
-
-    ASM_SSE2(cpu, "mfence");
-
-    for (unsigned y = 0; y < height; y++) {
-        unsigned x = 0;
-        bool unaligned = ((intptr_t)dst & 0x0f) != 0;
-
-#ifdef CAN_COMPILE_SSE2
-        if (cpu & CPU_CAPABILITY_SSE2) {
-            if (!unaligned) {
-                for (; x+63 < width; x += 64)
-                    COPY64(&dst[x], &src[x], "movdqa", "movntdq");
-            } else {
-                for (; x+63 < width; x += 64)
-                    COPY64(&dst[x], &src[x], "movdqa", "movdqu");
-            }
-        }
-#endif
-
-        for (; x < width; x++)
-            dst[x] = src[x];
-
-        src += src_pitch;
-        dst += dst_pitch;
-    }
-}
-
-static void SplitUV(uint8_t *dstu, size_t dstu_pitch,
-                    uint8_t *dstv, size_t dstv_pitch,
-                    const uint8_t *src, size_t src_pitch,
-                    unsigned width, unsigned height, unsigned cpu)
-{
-    const uint8_t shuffle[] = { 0, 2, 4, 6, 8, 10, 12, 14,
-                                1, 3, 5, 7, 9, 11, 13, 15 };
-    const uint8_t mask[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
-                             0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
-
-    assert(((intptr_t)src & 0x0f) == 0 && (src_pitch & 0x0f) == 0);
-
-    ASM_SSE2(cpu, "mfence");
-
-    for (unsigned y = 0; y < height; y++) {
-        unsigned x = 0;
-
-#define LOAD64 \
-    "movdqa  0(%[src]), %%xmm0\n" \
-    "movdqa 16(%[src]), %%xmm1\n" \
-    "movdqa 32(%[src]), %%xmm2\n" \
-    "movdqa 48(%[src]), %%xmm3\n"
-
-#define STORE2X32 \
-    "movq   %%xmm0,   0(%[dst1])\n" \
-    "movq   %%xmm1,   8(%[dst1])\n" \
-    "movhpd %%xmm0,   0(%[dst2])\n" \
-    "movhpd %%xmm1,   8(%[dst2])\n" \
-    "movq   %%xmm2,  16(%[dst1])\n" \
-    "movq   %%xmm3,  24(%[dst1])\n" \
-    "movhpd %%xmm2,  16(%[dst2])\n" \
-    "movhpd %%xmm3,  24(%[dst2])\n"
-
-#ifdef CAN_COMPILE_SSSE3
-        if (cpu & CPU_CAPABILITY_SSSE3) {
-            for (x = 0; x < (width & ~31); x += 32) {
-                asm volatile (
-                    "movdqu (%[shuffle]), %%xmm7\n"
-                    LOAD64
-                    "pshufb  %%xmm7, %%xmm0\n"
-                    "pshufb  %%xmm7, %%xmm1\n"
-                    "pshufb  %%xmm7, %%xmm2\n"
-                    "pshufb  %%xmm7, %%xmm3\n"
-                    STORE2X32
-                    : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory");
-            }
-        } else
-#endif
-#ifdef CAN_COMPILE_SSE2
-        if (cpu & CPU_CAPABILITY_SSE2) {
-            for (x = 0; x < (width & ~31); x += 32) {
-                asm volatile (
-                    "movdqu (%[mask]), %%xmm7\n"
-                    LOAD64
-                    "movdqa   %%xmm0, %%xmm4\n"
-                    "movdqa   %%xmm1, %%xmm5\n"
-                    "movdqa   %%xmm2, %%xmm6\n"
-                    "psrlw    $8,     %%xmm0\n"
-                    "psrlw    $8,     %%xmm1\n"
-                    "pand     %%xmm7, %%xmm4\n"
-                    "pand     %%xmm7, %%xmm5\n"
-                    "pand     %%xmm7, %%xmm6\n"
-                    "packuswb %%xmm4, %%xmm0\n"
-                    "packuswb %%xmm5, %%xmm1\n"
-                    "pand     %%xmm3, %%xmm7\n"
-                    "psrlw    $8,     %%xmm2\n"
-                    "psrlw    $8,     %%xmm3\n"
-                    "packuswb %%xmm6, %%xmm2\n"
-                    "packuswb %%xmm7, %%xmm3\n"
-                    STORE2X32
-                    : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory");
-            }
-        }
-#endif
-#undef STORE2X32
-#undef LOAD64
-
-        for (; x < width; x++) {
-            dstu[x] = src[2*x+0];
-            dstv[x] = src[2*x+1];
-        }
-        src  += src_pitch;
-        dstu += dstu_pitch;
-        dstv += dstv_pitch;
-    }
-}
-
-static void CopyPlane(uint8_t *dst, size_t dst_pitch, const uint8_t *src, size_t src_pitch,
-                      uint8_t *cache, size_t cache_size,
-                      unsigned width, unsigned height,
-                      unsigned cpu)
-{
-    const unsigned w16 = (width+15) & ~15;
-    const unsigned hstep = cache_size / w16;
-    assert(hstep > 0);
-
-    for (unsigned y = 0; y < height; y += hstep) {
-        const unsigned unaligned = (intptr_t)src & 0x0f;
-        const unsigned hblock =  __MIN(hstep, height - y);
-
-        /* Copy a bunch of line into our cache */
-        CopyFromUswc(cache, w16,
-                     src, src_pitch,
-                     unaligned,
-                     width, hblock, cpu);
-
-        /* Copy from our cache to the destination */
-        Copy2d(dst, dst_pitch,
-               cache, w16,
-               width, hblock, cpu);
-
-        /* */
-        src += src_pitch * hblock;
-        dst += dst_pitch * hblock;
-    }
-
-    ASM_SSE2(cpu, "mfence");
-}
-static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
-                        uint8_t *dstv, size_t dstv_pitch,
-                        const uint8_t *src, size_t src_pitch,
-                        uint8_t *cache, size_t cache_size,
-                        unsigned width, unsigned height,
-                        unsigned cpu)
-{
-    const unsigned w2_16 = (2*width+15) & ~15;
-    const unsigned hstep = cache_size / w2_16;
-    assert(hstep > 0);
-
-    for (unsigned y = 0; y < height; y += hstep) {
-        const unsigned unaligned = (intptr_t)src & 0x0f;
-        const unsigned hblock =  __MIN(hstep, height - y);
-
-        /* Copy a bunch of line into our cache */
-        CopyFromUswc(cache, w2_16,
-                     src, src_pitch,
-                     unaligned,
-                     2*width, hblock, cpu);
-
-        /* Copy from our cache to the destination */
-        SplitUV(dstu, dstu_pitch,
-                dstv, dstv_pitch,
-                cache, w2_16,
-                width, hblock, cpu);
-
-        /* */
-        src  += src_pitch  * hblock;
-        dstu += dstu_pitch * hblock;
-        dstv += dstv_pitch * hblock;
-    }
-
-    ASM_SSE2(cpu, "mfence");
-}
-
-static void CopyFromNv12(picture_t *dst, const D3DLOCKED_RECT *src,
-                         uint8_t *cache, size_t cache_size,
-                         unsigned width, unsigned height)
-{
-    const unsigned cpu = vlc_CPU();
-
-    /* */
-    CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
-              src->pBits, src->Pitch,
-              cache, cache_size,
-              width, height, cpu);
-    SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
-                dst->p[1].p_pixels, dst->p[1].i_pitch,
-                (const uint8_t*)src->pBits + src->Pitch * height, src->Pitch,
-                cache, cache_size,
-                width/2, height/2, cpu);
-
-    ASM_SSE2(cpu, "emms");
-}
-static void CopyFromYv12(picture_t *dst, const D3DLOCKED_RECT *src,
-                         uint8_t *cache, size_t cache_size,
-                         unsigned width, unsigned height)
-{
-    const unsigned cpu = vlc_CPU();
-
-    /* */
-    for (unsigned n = 0, offset = 0; n < 3; n++) {
-        const unsigned d = n > 0 ? 2 : 1;
-        CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
-                  (const uint8_t*)src->pBits + offset, src->Pitch/d,
-                  cache, cache_size,
-                  width/d, height/d, cpu);
-        offset += (src->Pitch/d) * (height/d);
-    }
-    ASM_SSE2(cpu, "emms");
-}
-#undef ASM_SSE2
-#undef COPY64
-
-#else
-vlc_va_t *vlc_va_NewDxva2(vlc_object_t *log, int codec_id)
-{
-    (void)log;
-    (void)codec_id;
-    return NULL;
+    CopyCleanCache(&va->surface_cache);
 }
 #endif