1 /*****************************************************************************
2 * va.c: Video Acceleration helpers
3 *****************************************************************************
4 * Copyright (C) 2009 Geoffroy Couprie
5 * Copyright (C) 2009 Laurent Aimar
8 * Authors: Geoffroy Couprie <geal@videolan.org>
9 * Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
30 #include <vlc_common.h>
31 #include <vlc_picture.h>
32 #include <vlc_fourcc.h>
36 #ifdef HAVE_LIBAVCODEC_AVCODEC_H
37 # include <libavcodec/avcodec.h>
38 # ifdef HAVE_AVCODEC_DXVA2
39 # define DXVA2API_USE_BITFIELDS
40 # include <libavcodec/dxva2.h>
42 #elif defined(HAVE_FFMPEG_AVCODEC_H)
43 # include <ffmpeg/avcodec.h>
51 #ifdef HAVE_AVCODEC_DXVA2
61 #define CoTaskMemFree(x)
64 #define DXVA2_E_NOT_INITIALIZED MAKE_HRESULT(1, 4, 4096)
65 #define DXVA2_E_NEW_VIDEO_DEVICE MAKE_HRESULT(1, 4, 4097)
66 #define DXVA2_E_VIDEO_DEVICE_LOCKED MAKE_HRESULT(1, 4, 4098)
67 #define DXVA2_E_NOT_AVAILABLE MAKE_HRESULT(1, 4, 4099)
69 static const GUID DXVA2_ModeMPEG2_MoComp = {
70 0xe6a9f44b, 0x61b0, 0x4563, {0x9e,0xa4,0x63,0xd2,0xa3,0xc6,0xfe,0x66}
72 static const GUID DXVA2_ModeMPEG2_IDCT = {
73 0xbf22ad00, 0x03ea, 0x4690, {0x80,0x77,0x47,0x33,0x46,0x20,0x9b,0x7e}
75 static const GUID DXVA2_ModeMPEG2_VLD = {
76 0xee27417f, 0x5e28, 0x4e65, {0xbe,0xea,0x1d,0x26,0xb5,0x08,0xad,0xc9}
79 static const GUID DXVA2_ModeH264_A = {
80 0x1b81be64, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
82 static const GUID DXVA2_ModeH264_B = {
83 0x1b81be65, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
85 static const GUID DXVA2_ModeH264_C = {
86 0x1b81be66, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
88 static const GUID DXVA2_ModeH264_D = {
89 0x1b81be67, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
91 static const GUID DXVA2_ModeH264_E = {
92 0x1b81be68, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
94 static const GUID DXVA2_ModeH264_F = {
95 0x1b81be69, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
97 static const GUID DXVA2_ModeWMV8_A = {
98 0x1b81be80, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
100 static const GUID DXVA2_ModeWMV8_B = {
101 0x1b81be81, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
103 static const GUID DXVA2_ModeWMV9_A = {
104 0x1b81be90, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
106 static const GUID DXVA2_ModeWMV9_B = {
107 0x1b81be91, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
109 static const GUID DXVA2_ModeWMV9_C = {
110 0x1b81be94, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
113 static const GUID DXVA2_ModeVC1_A = {
114 0x1b81beA0, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
116 static const GUID DXVA2_ModeVC1_B = {
117 0x1b81beA1, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
119 static const GUID DXVA2_ModeVC1_C = {
120 0x1b81beA2, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
122 static const GUID DXVA2_ModeVC1_D = {
123 0x1b81beA3, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
132 /* XXX Prefered modes must come first */
133 static const dxva2_mode_t dxva2_modes[] = {
134 { "DXVA2_ModeMPEG2_VLD", &DXVA2_ModeMPEG2_VLD, 0 },
135 { "DXVA2_ModeMPEG2_MoComp", &DXVA2_ModeMPEG2_MoComp, 0 },
136 { "DXVA2_ModeMPEG2_IDCT", &DXVA2_ModeMPEG2_IDCT, 0 },
138 { "H.264 variable-length decoder (VLD), FGT", &DXVA2_ModeH264_F, CODEC_ID_H264 },
139 { "H.264 VLD, no FGT", &DXVA2_ModeH264_E, CODEC_ID_H264 },
140 { "H.264 IDCT, FGT", &DXVA2_ModeH264_D, 0, },
141 { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C, 0, },
142 { "H.264 MoComp, FGT", &DXVA2_ModeH264_B, 0, },
143 { "H.264 motion compensation (MoComp), no FGT", &DXVA2_ModeH264_A, 0, },
145 { "Windows Media Video 8 MoComp", &DXVA2_ModeWMV8_B, 0 },
146 { "Windows Media Video 8 post processing", &DXVA2_ModeWMV8_A, 0 },
148 { "Windows Media Video 9 IDCT", &DXVA2_ModeWMV9_C, 0 },
149 { "Windows Media Video 9 MoComp", &DXVA2_ModeWMV9_B, 0 },
150 { "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A, 0 },
152 { "VC-1 VLD", &DXVA2_ModeVC1_D, 0 },
153 { "VC-1 IDCT", &DXVA2_ModeVC1_C, 0 },
154 { "VC-1 MoComp", &DXVA2_ModeVC1_B, 0 },
155 { "VC-1 post processing", &DXVA2_ModeVC1_A, 0 },
160 static const dxva2_mode_t *Dxva2FindMode(const GUID *guid)
162 for (unsigned i = 0; dxva2_modes[i].name; i++) {
163 if (IsEqualGUID(dxva2_modes[i].guid, guid))
164 return &dxva2_modes[i];
170 #define VLC_CODEC_NV12 VLC_FOURCC('N','V','1','2') /* TODO move to vlc_fourcc.h */
176 /* XXX Prefered format must come first */
177 static const d3d_format_t d3d_formats[] = {
178 { "YV12", MAKEFOURCC('Y','V','1','2'), VLC_CODEC_YV12 },
179 { "NV12", MAKEFOURCC('N','V','1','2'), VLC_CODEC_NV12 },
184 static const d3d_format_t *D3dFindFormat(D3DFORMAT format)
186 for (unsigned i = 0; d3d_formats[i].name; i++) {
187 if (d3d_formats[i].format == format)
188 return &d3d_formats[i];
193 static const GUID IID_IDirectXVideoDecoderService = {
194 0xfc51a551, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
196 static const GUID IID_IDirectXVideoAccelerationService = {
197 0xfc51a550, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
202 LPDIRECT3DSURFACE9 d3d;
207 #define VA_DXVA2_MAX_SURFACE_COUNT (64)
219 HINSTANCE hdxva2_dll;
222 D3DPRESENT_PARAMETERS d3dpp;
224 LPDIRECT3DDEVICE9 d3ddev;
228 IDirect3DDeviceManager9 *devmng;
232 IDirectXVideoDecoderService *vs;
237 DXVA2_ConfigPictureDecode cfg;
238 IDirectXVideoDecoder *decoder;
240 /* Option conversion */
242 uint8_t *surface_cache_base;
243 uint8_t *surface_cache;
244 size_t surface_cache_size;
247 struct dxva_context hw;
250 unsigned surface_count;
251 unsigned surface_order;
254 vlc_fourcc_t surface_chroma;
256 vlc_va_surface_t surface[VA_DXVA2_MAX_SURFACE_COUNT];
257 LPDIRECT3DSURFACE9 hw_surface[VA_DXVA2_MAX_SURFACE_COUNT];
261 static vlc_va_dxva2_t *vlc_va_dxva2_Get(void *external)
263 assert(external == (void*)(&((vlc_va_dxva2_t*)external)->va));
268 static int D3dCreateDevice(vlc_va_dxva2_t *);
269 static void D3dDestroyDevice(vlc_va_dxva2_t *);
270 static char *DxDescribe(vlc_va_dxva2_t *);
272 static int D3dCreateDeviceManager(vlc_va_dxva2_t *);
273 static void D3dDestroyDeviceManager(vlc_va_dxva2_t *);
275 static int DxCreateVideoService(vlc_va_dxva2_t *);
276 static void DxDestroyVideoService(vlc_va_dxva2_t *);
277 static int DxFindVideoServiceConversion(vlc_va_dxva2_t *, GUID *input, D3DFORMAT *output);
279 static int DxCreateVideoDecoder(vlc_va_dxva2_t *,
280 int codec_id, const video_format_t *);
281 static void DxDestroyVideoDecoder(vlc_va_dxva2_t *);
282 static int DxResetVideoDecoder(vlc_va_dxva2_t *);
284 static void DxCreateVideoConversion(vlc_va_dxva2_t *);
285 static void DxDestroyVideoConversion(vlc_va_dxva2_t *);
287 static void CopyFromNv12(picture_t *dst, const D3DLOCKED_RECT *src,
288 uint8_t *cache, size_t cache_size,
289 unsigned width, unsigned height);
290 static void CopyFromYv12(picture_t *dst, const D3DLOCKED_RECT *src,
291 uint8_t *cache, size_t cache_size,
292 unsigned width, unsigned height);
295 static int Setup(vlc_va_t *external, void **hw, vlc_fourcc_t *chroma,
296 int width, int height)
298 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
300 if (va->surface_width == width &&
301 va->surface_height == height)
305 DxDestroyVideoConversion(va);
306 DxDestroyVideoDecoder(va);
310 if (width <= 0 || height <= 0)
313 /* FIXME transmit a video_format_t by VaSetup directly */
315 memset(&fmt, 0, sizeof(fmt));
317 fmt.i_height = height;
319 if (DxCreateVideoDecoder(va, va->codec_id, &fmt))
322 va->hw.decoder = va->decoder;
323 if (va->codec_id == CODEC_ID_H264)
324 va->hw.cfg = &va->cfg;
325 va->hw.surface_count = va->surface_count;
326 va->hw.surface = va->hw_surface;
327 for (unsigned i = 0; i < va->surface_count; i++)
328 va->hw.surface[i] = va->surface[i].d3d;
331 DxCreateVideoConversion(va);
336 const d3d_format_t *output = D3dFindFormat(va->output);
337 *chroma = output->codec;
342 static int Extract(vlc_va_t *external, picture_t *picture, AVFrame *ff)
344 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
345 LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)ff->data[3];
347 if (!va->surface_cache)
351 assert(va->output == MAKEFOURCC('Y','V','1','2'));
355 if (FAILED(IDirect3DSurface9_LockRect(d3d, &lock, NULL, D3DLOCK_READONLY))) {
356 msg_Err(va->log, "Failed to lock surface");
360 if (va->render == MAKEFOURCC('Y','V','1','2')) {
361 CopyFromYv12(picture, &lock,
362 va->surface_cache, va->surface_cache_size,
363 va->surface_width, va->surface_height);
365 assert(va->render == MAKEFOURCC('N','V','1','2'));
366 CopyFromNv12(picture, &lock,
367 va->surface_cache, va->surface_cache_size,
368 va->surface_width, va->surface_height);
372 IDirect3DSurface9_UnlockRect(d3d);
375 /* FIXME it is nearly common with VAAPI */
376 static int Get(vlc_va_t *external, AVFrame *ff)
378 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
380 /* Check the device */
381 HRESULT hr = IDirect3DDeviceManager9_TestDevice(va->devmng, va->device);
382 if (hr == DXVA2_E_NEW_VIDEO_DEVICE) {
383 if (DxResetVideoDecoder(va))
385 } else if (FAILED(hr)) {
386 msg_Err(va->log, "IDirect3DDeviceManager9_TestDevice %u", (unsigned)hr);
390 /* Grab an unused surface, in case none are, try the oldest
391 * XXX using the oldest is a workaround in case a problem happens with ffmpeg */
393 for (i = 0, old = 0; i < va->surface_count; i++) {
394 vlc_va_surface_t *surface = &va->surface[i];
396 if (!surface->refcount)
399 if (surface->order < va->surface[old].order)
402 if (i >= va->surface_count)
405 vlc_va_surface_t *surface = &va->surface[i];
407 surface->refcount = 1;
408 surface->order = va->surface_order++;
411 for (int i = 0; i < 4; i++) {
415 if (i == 0 || i == 3)
416 ff->data[i] = (void*)surface->d3d;/* Yummie */
420 static void Release(vlc_va_t *external, AVFrame *ff)
422 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
423 LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)ff->data[3];
425 for (unsigned i = 0; i < va->surface_count; i++) {
426 vlc_va_surface_t *surface = &va->surface[i];
428 if (surface->d3d == d3d)
432 static void Close(vlc_va_t *external)
434 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
436 DxDestroyVideoConversion(va);
437 DxDestroyVideoDecoder(va);
438 DxDestroyVideoService(va);
439 D3dDestroyDeviceManager(va);
440 D3dDestroyDevice(va);
443 FreeLibrary(va->hdxva2_dll);
445 FreeLibrary(va->hd3d9_dll);
447 free(va->va.description);
451 vlc_va_t *vlc_va_NewDxva2(vlc_object_t *log, int codec_id)
453 vlc_va_dxva2_t *va = calloc(1, sizeof(*va));
459 va->codec_id = codec_id;
462 va->hd3d9_dll = LoadLibrary(TEXT("D3D9.DLL"));
463 if (!va->hd3d9_dll) {
464 msg_Warn(va->log, "cannot load d3d9.dll");
467 va->hdxva2_dll = LoadLibrary(TEXT("DXVA2.DLL"));
468 if (!va->hdxva2_dll) {
469 msg_Warn(va->log, "cannot load dxva2.dll");
472 msg_Dbg(va->log, "DLLs loaded");
475 if (D3dCreateDevice(va)) {
476 msg_Err(va->log, "Failed to create Direct3D device");
479 msg_Dbg(va->log, "D3dCreateDevice succeed");
481 if (D3dCreateDeviceManager(va)) {
482 msg_Err(va->log, "D3dCreateDeviceManager failed");
486 if (DxCreateVideoService(va)) {
487 msg_Err(va->log, "DxCreateVideoService failed");
492 if (DxFindVideoServiceConversion(va, &va->input, &va->render)) {
493 msg_Err(va->log, "DxFindVideoServiceConversion failed");
497 /* TODO print the hardware name/vendor for debugging purposes */
498 va->va.description = DxDescribe(va);
499 va->va.setup = Setup;
501 va->va.release = Release;
502 va->va.extract = Extract;
503 va->va.close = Close;
513 * It creates a Direct3D device usable for DXVA 2
515 static int D3dCreateDevice(vlc_va_dxva2_t *va)
518 LPDIRECT3D9 (WINAPI *Create9)(UINT SDKVersion);
519 Create9 = (void *)GetProcAddress(va->hd3d9_dll,
520 TEXT("Direct3DCreate9"));
522 msg_Err(va->log, "Cannot locate reference to Direct3DCreate9 ABI in DLL");
528 d3dobj = Create9(D3D_SDK_VERSION);
530 msg_Err(va->log, "Direct3DCreate9 failed");
536 D3DPRESENT_PARAMETERS *d3dpp = &va->d3dpp;
537 ZeroMemory(d3dpp, sizeof(*d3dpp));
538 d3dpp->Flags = D3DPRESENTFLAG_VIDEO;
539 d3dpp->Windowed = TRUE;
540 d3dpp->hDeviceWindow = NULL;
541 d3dpp->SwapEffect = D3DSWAPEFFECT_DISCARD;
542 d3dpp->MultiSampleType = D3DMULTISAMPLE_NONE;
543 d3dpp->PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT;
544 d3dpp->BackBufferCount = 0; /* FIXME what to put here */
545 d3dpp->BackBufferFormat = D3DFMT_X8R8G8B8; /* FIXME what to put here */
546 d3dpp->BackBufferWidth = 0;
547 d3dpp->BackBufferHeight = 0;
548 d3dpp->EnableAutoDepthStencil = FALSE;
550 /* Direct3D needs a HWND to create a device, even without using ::Present
551 this HWND is used to alert Direct3D when there's a change of focus window.
552 For now, use GetShellWindow, as it looks harmless */
553 LPDIRECT3DDEVICE9 d3ddev;
554 if (FAILED(IDirect3D9_CreateDevice(d3dobj, D3DADAPTER_DEFAULT,
555 D3DDEVTYPE_HAL, GetShellWindow(),
556 D3DCREATE_SOFTWARE_VERTEXPROCESSING |
557 D3DCREATE_MULTITHREADED,
559 msg_Err(va->log, "IDirect3D9_CreateDevice failed");
567 * It releases a Direct3D device and its resources.
569 static void D3dDestroyDevice(vlc_va_dxva2_t *va)
572 IDirect3DDevice9_Release(va->d3ddev);
574 IDirect3D9_Release(va->d3dobj);
577 * It describes our Direct3D object
579 static char *DxDescribe(vlc_va_dxva2_t *va)
581 D3DADAPTER_IDENTIFIER9 id;
582 ZeroMemory(&id, sizeof(id));
584 if (FAILED(IDirect3D9_GetAdapterIdentifier(va->d3dobj,
585 D3DADAPTER_DEFAULT, 0, &id)))
586 return strdup("DXVA2 (unknown)");
589 if (asprintf(&description, "DXVA2 (%.*s, vendor %d, device %d, revision %d)",
590 sizeof(id.Description), id.Description,
591 id.VendorId, id.DeviceId, id.Revision) < 0)
597 * It creates a Direct3D device manager
599 static int D3dCreateDeviceManager(vlc_va_dxva2_t *va)
601 HRESULT (WINAPI *CreateDeviceManager9)(UINT *pResetToken,
602 IDirect3DDeviceManager9 **);
603 CreateDeviceManager9 =
604 (void *)GetProcAddress(va->hdxva2_dll,
605 TEXT("DXVA2CreateDirect3DDeviceManager9"));
607 if (!CreateDeviceManager9) {
608 msg_Err(va->log, "cannot load function\n");
611 msg_Dbg(va->log, "OurDirect3DCreateDeviceManager9 Success!");
614 IDirect3DDeviceManager9 *devmng;
615 if (FAILED(CreateDeviceManager9(&token, &devmng))) {
616 msg_Err(va->log, " OurDirect3DCreateDeviceManager9 failed");
621 msg_Info(va->log, "obtained IDirect3DDeviceManager9");
623 HRESULT hr = IDirect3DDeviceManager9_ResetDevice(devmng, va->d3ddev, token);
625 msg_Err(va->log, "IDirect3DDeviceManager9_ResetDevice failed: %08x", (unsigned)hr);
631 * It destroys a Direct3D device manager
633 static void D3dDestroyDeviceManager(vlc_va_dxva2_t *va)
636 IDirect3DDeviceManager9_Release(va->devmng);
640 * It creates a DirectX video service
642 static int DxCreateVideoService(vlc_va_dxva2_t *va)
644 HRESULT (WINAPI *CreateVideoService)(IDirect3DDevice9 *,
648 (void *)GetProcAddress(va->hdxva2_dll,
649 TEXT("DXVA2CreateVideoService"));
651 if (!CreateVideoService) {
652 msg_Err(va->log, "cannot load function\n");
655 msg_Info(va->log, "DXVA2CreateVideoService Success!");
660 hr = IDirect3DDeviceManager9_OpenDeviceHandle(va->devmng, &device);
662 msg_Err(va->log, "OpenDeviceHandle failed");
667 IDirectXVideoDecoderService *vs;
668 hr = IDirect3DDeviceManager9_GetVideoService(va->devmng, device,
669 &IID_IDirectXVideoDecoderService,
672 msg_Err(va->log, "GetVideoService failed");
680 * It destroys a DirectX video service
682 static void DxDestroyVideoService(vlc_va_dxva2_t *va)
685 IDirect3DDeviceManager9_CloseDeviceHandle(va->devmng, va->device);
687 IDirectXVideoDecoderService_Release(va->vs);
690 * Find the best suited decoder mode GUID and render format.
692 static int DxFindVideoServiceConversion(vlc_va_dxva2_t *va, GUID *input, D3DFORMAT *output)
694 /* Retreive supported modes from the decoder service */
695 UINT input_count = 0;
696 GUID *input_list = NULL;
697 if (FAILED(IDirectXVideoDecoderService_GetDecoderDeviceGuids(va->vs,
700 msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderDeviceGuids failed");
703 for (unsigned i = 0; i < input_count; i++) {
704 const GUID *g = &input_list[i];
705 const dxva2_mode_t *mode = Dxva2FindMode(g);
707 msg_Dbg(va->log, "- '%s' is supported by hardware", mode->name);
709 msg_Warn(va->log, "- Unknown GUID = %08X-%04x-%04x-XXXX",
710 (unsigned)g->Data1, g->Data2, g->Data3);
714 /* Try all supported mode by our priority */
715 for (unsigned i = 0; dxva2_modes[i].name; i++) {
716 const dxva2_mode_t *mode = &dxva2_modes[i];
717 if (!mode->codec || mode->codec != va->codec_id)
721 bool is_suported = false;
722 for (const GUID *g = &input_list[0]; !is_suported && g < &input_list[input_count]; g++) {
723 is_suported = IsEqualGUID(mode->guid, g);
729 msg_Dbg(va->log, "Trying to use '%s' as input", mode->name);
730 UINT output_count = 0;
731 D3DFORMAT *output_list = NULL;
732 if (FAILED(IDirectXVideoDecoderService_GetDecoderRenderTargets(va->vs, mode->guid,
735 msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderRenderTargets failed");
738 for (unsigned j = 0; j < output_count; j++) {
739 const D3DFORMAT f = output_list[j];
740 const d3d_format_t *format = D3dFindFormat(f);
742 msg_Dbg(va->log, "%s is supported for output", format->name);
744 msg_Dbg(va->log, "%d is supported for output (%4.4s)", f, (const char*)&f);
749 for (unsigned j = 0; d3d_formats[j].name; j++) {
750 const d3d_format_t *format = &d3d_formats[j];
753 bool is_suported = false;
754 for (unsigned k = 0; !is_suported && k < output_count; k++) {
755 is_suported = format->format == output_list[k];
760 /* We have our solution */
761 msg_Dbg(va->log, "Using '%s' to decode to '%s'", mode->name, format->name);
762 *input = *mode->guid;
763 *output = format->format;
764 CoTaskMemFree(output_list);
765 CoTaskMemFree(input_list);
768 CoTaskMemFree(output_list);
770 CoTaskMemFree(input_list);
775 * It creates a DXVA2 decoder using the given video format
777 static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
778 int codec_id, const video_format_t *fmt)
781 msg_Dbg(va->log, "DxCreateVideoDecoder id %d %dx%d",
782 codec_id, fmt->i_width, fmt->i_height);
784 /* Allocates all surfaces needed for the decoder */
787 va->surface_count = 16 + 1;
790 va->surface_count = 2 + 1;
793 LPDIRECT3DSURFACE9 surface_list[VA_DXVA2_MAX_SURFACE_COUNT];
794 if (FAILED(IDirectXVideoDecoderService_CreateSurface(va->vs,
797 va->surface_count - 1,
801 DXVA2_VideoDecoderRenderTarget,
804 msg_Err(va->log, "IDirectXVideoAccelerationService_CreateSurface failed\n");
805 va->surface_count = 0;
808 for (unsigned i = 0; i < va->surface_count; i++) {
809 vlc_va_surface_t *surface = &va->surface[i];
810 surface->d3d = surface_list[i];
811 surface->refcount = 0;
814 va->surface_width = fmt->i_width;
815 va->surface_height = fmt->i_height;
816 msg_Dbg(va->log, "IDirectXVideoAccelerationService_CreateSurface succeed with %d surfaces (%dx%d)",
817 va->surface_count, fmt->i_width, fmt->i_height);
821 ZeroMemory(&dsc, sizeof(dsc));
822 dsc.SampleWidth = fmt->i_width;
823 dsc.SampleHeight = fmt->i_height;
824 dsc.Format = va->render;
825 if (fmt->i_frame_rate > 0 && fmt->i_frame_rate_base > 0) {
826 dsc.InputSampleFreq.Numerator = fmt->i_frame_rate;
827 dsc.InputSampleFreq.Denominator = fmt->i_frame_rate_base;
829 dsc.InputSampleFreq.Numerator = 0;
830 dsc.InputSampleFreq.Denominator = 0;
832 dsc.OutputFrameFreq = dsc.InputSampleFreq;
833 dsc.UABProtectionLevel = FALSE;
836 /* FIXME I am unsure we can let unknown everywhere */
837 DXVA2_ExtendedFormat *ext = &dsc.SampleFormat;
838 ext->SampleFormat = 0;//DXVA2_SampleUnknown;
839 ext->VideoChromaSubsampling = 0;//DXVA2_VideoChromaSubsampling_Unknown;
840 ext->NominalRange = 0;//DXVA2_NominalRange_Unknown;
841 ext->VideoTransferMatrix = 0;//DXVA2_VideoTransferMatrix_Unknown;
842 ext->VideoLighting = 0;//DXVA2_VideoLighting_Unknown;
843 ext->VideoPrimaries = 0;//DXVA2_VideoPrimaries_Unknown;
844 ext->VideoTransferFunction = 0;//DXVA2_VideoTransFunc_Unknown;
846 /* List all configurations available for the decoder */
848 DXVA2_ConfigPictureDecode *cfg_list = NULL;
849 if (FAILED(IDirectXVideoDecoderService_GetDecoderConfigurations(va->vs,
855 msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderConfigurations failed\n");
858 msg_Dbg(va->log, "we got %d decoder configurations", cfg_count);
860 /* Select the best decoder configuration */
861 bool has_cfg = false;
862 for (unsigned i = 0; i < cfg_count; i++) {
863 const DXVA2_ConfigPictureDecode *cfg = &cfg_list[i];
866 msg_Dbg(va->log, "configuration[%d] ConfigBitstreamRaw %d",
867 i, cfg->ConfigBitstreamRaw);
870 if ((!has_cfg && cfg->ConfigBitstreamRaw == 1) ||
871 (codec_id == CODEC_ID_H264 && cfg->ConfigBitstreamRaw == 2)) {
876 CoTaskMemFree(cfg_list);
878 msg_Err(va->log, "Failed to find a supported decoder configuration");
882 /* Create the decoder */
883 IDirectXVideoDecoder *decoder;
884 if (FAILED(IDirectXVideoDecoderService_CreateVideoDecoder(va->vs,
891 msg_Err(va->log, "IDirectXVideoDecoderService_CreateVideoDecoder failed\n");
894 va->decoder = decoder;
895 msg_Dbg(va->log, "IDirectXVideoDecoderService_CreateVideoDecoder succeed");
898 static void DxDestroyVideoDecoder(vlc_va_dxva2_t *va)
901 IDirectXVideoDecoder_Release(va->decoder);
904 for (unsigned i = 0; i < va->surface_count; i++)
905 IDirect3DSurface9_Release(va->surface[i].d3d);
906 va->surface_count = 0;
908 static int DxResetVideoDecoder(vlc_va_dxva2_t *va)
910 msg_Err(va->log, "DxResetVideoDecoder unimplemented");
914 static void DxCreateVideoConversion(vlc_va_dxva2_t *va)
916 switch (va->render) {
917 case MAKEFOURCC('N','V','1','2'):
918 va->output = MAKEFOURCC('Y','V','1','2');
921 va->output = va->render;
924 va->surface_cache_size = __MAX((va->surface_width + 0x0f) & ~ 0x0f, 4096);
925 va->surface_cache_base = malloc(16 + va->surface_cache_size);
926 va->surface_cache = &va->surface_cache_base[16 - ((intptr_t)va->surface_cache_base & 0x0f)];
928 static void DxDestroyVideoConversion(vlc_va_dxva2_t *va)
930 free(va->surface_cache_base);
931 va->surface_cache_base = NULL;
932 va->surface_cache = NULL;
933 va->surface_cache_size = 0;
936 /* Copy 64 bytes from srcp to dsp loading data with the SSE>=2 instruction load and
937 * storing data with the SSE>=2 instruction store.
939 #define COPY64(dstp, srcp, load, store) \
941 load " 0(%[src]), %%xmm1\n" \
942 load " 16(%[src]), %%xmm2\n" \
943 load " 32(%[src]), %%xmm3\n" \
944 load " 48(%[src]), %%xmm4\n" \
945 store " %%xmm1, 0(%[dst])\n" \
946 store " %%xmm2, 16(%[dst])\n" \
947 store " %%xmm3, 32(%[dst])\n" \
948 store " %%xmm4, 48(%[dst])\n" \
949 : : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
951 /* Execute the instruction op only if SSE2 is supported. */
952 #ifdef CAN_COMPILE_SSE2
953 # define ASM_SSE2(cpu, op) do { \
954 if (cpu & CPU_CAPABILITY_SSE2) \
958 # define ASM_SSE2(cpu, op)
961 /* Optimized copy from "Uncacheable Speculative Write Combining" memory
962 * as used by some video surface.
963 * XXX It is really efficient only when SSE4.1 is available.
965 static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
966 const uint8_t *src, size_t src_pitch,
968 unsigned width, unsigned height,
971 assert(((intptr_t)dst & 0x0f) == 0 && (dst_pitch & 0x0f) == 0);
973 ASM_SSE2(cpu, "mfence");
974 for (unsigned y = 0; y < height; y++) {
977 for (x = 0; x < unaligned; x++)
980 #ifdef CAN_COMPILE_SSE4_1
981 if (cpu & CPU_CAPABILITY_SSE4_1) {
983 for (; x+63 < width; x += 64)
984 COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
986 for (; x+63 < width; x += 64)
987 COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
991 #ifdef CAN_COMPILE_SSE2
992 if (cpu & CPU_CAPABILITY_SSE2) {
994 for (; x+63 < width; x += 64)
995 COPY64(&dst[x], &src[x], "movdqa", "movdqa");
997 for (; x+63 < width; x += 64)
998 COPY64(&dst[x], &src[x], "movdqa", "movdqu");
1003 for (; x < width; x++)
1011 static void Copy2d(uint8_t *dst, size_t dst_pitch,
1012 const uint8_t *src, size_t src_pitch,
1013 unsigned width, unsigned height,
1016 assert(((intptr_t)src & 0x0f) == 0 && (src_pitch & 0x0f) == 0);
1018 ASM_SSE2(cpu, "mfence");
1020 for (unsigned y = 0; y < height; y++) {
1022 bool unaligned = ((intptr_t)dst & 0x0f) != 0;
1024 #ifdef CAN_COMPILE_SSE2
1025 if (cpu & CPU_CAPABILITY_SSE2) {
1027 for (; x+63 < width; x += 64)
1028 COPY64(&dst[x], &src[x], "movdqa", "movntdq");
1030 for (; x+63 < width; x += 64)
1031 COPY64(&dst[x], &src[x], "movdqa", "movdqu");
1036 for (; x < width; x++)
1044 static void SplitUV(uint8_t *dstu, size_t dstu_pitch,
1045 uint8_t *dstv, size_t dstv_pitch,
1046 const uint8_t *src, size_t src_pitch,
1047 unsigned width, unsigned height, unsigned cpu)
1049 const uint8_t shuffle[] = { 0, 2, 4, 6, 8, 10, 12, 14,
1050 1, 3, 5, 7, 9, 11, 13, 15 };
1051 const uint8_t mask[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
1052 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
1054 assert(((intptr_t)src & 0x0f) == 0 && (src_pitch & 0x0f) == 0);
1056 ASM_SSE2(cpu, "mfence");
1058 for (unsigned y = 0; y < height; y++) {
1062 "movdqa 0(%[src]), %%xmm0\n" \
1063 "movdqa 16(%[src]), %%xmm1\n" \
1064 "movdqa 32(%[src]), %%xmm2\n" \
1065 "movdqa 48(%[src]), %%xmm3\n"
1068 "movq %%xmm0, 0(%[dst1])\n" \
1069 "movq %%xmm1, 8(%[dst1])\n" \
1070 "movhpd %%xmm0, 0(%[dst2])\n" \
1071 "movhpd %%xmm1, 8(%[dst2])\n" \
1072 "movq %%xmm2, 16(%[dst1])\n" \
1073 "movq %%xmm3, 24(%[dst1])\n" \
1074 "movhpd %%xmm2, 16(%[dst2])\n" \
1075 "movhpd %%xmm3, 24(%[dst2])\n"
1077 #ifdef CAN_COMPILE_SSSE3
1078 if (cpu & CPU_CAPABILITY_SSSE3) {
1079 for (x = 0; x < (width & ~31); x += 32) {
1081 "movdqu (%[shuffle]), %%xmm7\n"
1083 "pshufb %%xmm7, %%xmm0\n"
1084 "pshufb %%xmm7, %%xmm1\n"
1085 "pshufb %%xmm7, %%xmm2\n"
1086 "pshufb %%xmm7, %%xmm3\n"
1088 : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory");
1092 #ifdef CAN_COMPILE_SSE2
1093 if (cpu & CPU_CAPABILITY_SSE2) {
1094 for (x = 0; x < (width & ~31); x += 32) {
1096 "movdqu (%[mask]), %%xmm7\n"
1098 "movdqa %%xmm0, %%xmm4\n"
1099 "movdqa %%xmm1, %%xmm5\n"
1100 "movdqa %%xmm2, %%xmm6\n"
1101 "psrlw $8, %%xmm0\n"
1102 "psrlw $8, %%xmm1\n"
1103 "pand %%xmm7, %%xmm4\n"
1104 "pand %%xmm7, %%xmm5\n"
1105 "pand %%xmm7, %%xmm6\n"
1106 "packuswb %%xmm4, %%xmm0\n"
1107 "packuswb %%xmm5, %%xmm1\n"
1108 "pand %%xmm3, %%xmm7\n"
1109 "psrlw $8, %%xmm2\n"
1110 "psrlw $8, %%xmm3\n"
1111 "packuswb %%xmm6, %%xmm2\n"
1112 "packuswb %%xmm7, %%xmm3\n"
1114 : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory");
1121 for (; x < width; x++) {
1122 dstu[x] = src[2*x+0];
1123 dstv[x] = src[2*x+1];
1131 static void CopyPlane(uint8_t *dst, size_t dst_pitch, const uint8_t *src, size_t src_pitch,
1132 uint8_t *cache, size_t cache_size,
1133 unsigned width, unsigned height,
1136 const unsigned w16 = (width+15) & ~15;
1137 const unsigned hstep = cache_size / w16;
1140 for (unsigned y = 0; y < height; y += hstep) {
1141 const unsigned unaligned = (intptr_t)src & 0x0f;
1142 const unsigned hblock = __MIN(hstep, height - y);
1144 /* Copy a bunch of line into our cache */
1145 CopyFromUswc(cache, w16,
1148 width, hblock, cpu);
1150 /* Copy from our cache to the destination */
1151 Copy2d(dst, dst_pitch,
1153 width, hblock, cpu);
1156 src += src_pitch * hblock;
1157 dst += dst_pitch * hblock;
1160 ASM_SSE2(cpu, "mfence");
1162 static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
1163 uint8_t *dstv, size_t dstv_pitch,
1164 const uint8_t *src, size_t src_pitch,
1165 uint8_t *cache, size_t cache_size,
1166 unsigned width, unsigned height,
1169 const unsigned w2_16 = (2*width+15) & ~15;
1170 const unsigned hstep = cache_size / w2_16;
1173 for (unsigned y = 0; y < height; y += hstep) {
1174 const unsigned unaligned = (intptr_t)src & 0x0f;
1175 const unsigned hblock = __MIN(hstep, height - y);
1177 /* Copy a bunch of line into our cache */
1178 CopyFromUswc(cache, w2_16,
1181 2*width, hblock, cpu);
1183 /* Copy from our cache to the destination */
1184 SplitUV(dstu, dstu_pitch,
1187 width, hblock, cpu);
1190 src += src_pitch * hblock;
1191 dstu += dstu_pitch * hblock;
1192 dstv += dstv_pitch * hblock;
1195 ASM_SSE2(cpu, "mfence");
1198 static void CopyFromNv12(picture_t *dst, const D3DLOCKED_RECT *src,
1199 uint8_t *cache, size_t cache_size,
1200 unsigned width, unsigned height)
1202 const unsigned cpu = vlc_CPU();
1205 CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
1206 src->pBits, src->Pitch,
1208 width, height, cpu);
1209 SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
1210 dst->p[1].p_pixels, dst->p[1].i_pitch,
1211 (const uint8_t*)src->pBits + src->Pitch * height, src->Pitch,
1213 width/2, height/2, cpu);
1215 ASM_SSE2(cpu, "emms");
1217 static void CopyFromYv12(picture_t *dst, const D3DLOCKED_RECT *src,
1218 uint8_t *cache, size_t cache_size,
1219 unsigned width, unsigned height)
1221 const unsigned cpu = vlc_CPU();
1224 for (unsigned n = 0, offset = 0; n < 3; n++) {
1225 const unsigned d = n > 0 ? 2 : 1;
1226 CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
1227 (const uint8_t*)src->pBits + offset, src->Pitch/d,
1229 width/d, height/d, cpu);
1230 offset += (src->Pitch/d) * (height/d);
1232 ASM_SSE2(cpu, "emms");
1238 vlc_va_t *vlc_va_NewDxva2(vlc_object_t *log, int codec_id)