endif
endif
-QUOTED_CFLAGS := $(CFLAGS)
-
ifeq ($(HAVE_OPENCL),yes)
-empty:=
-space:=$(empty) $(empty)
-escaped:=\ $(empty)
-open:=(
-escopen:=\(
-close:=)
-escclose:=\)
-SAFE_INC_DIR := $(subst $(space),$(escaped),$(OPENCL_INC_DIR))
-SAFE_INC_DIR := $(subst $(open),$(escopen),$(SAFE_INC_DIR))
-SAFE_INC_DIR := $(subst $(close),$(escclose),$(SAFE_INC_DIR))
-SAFE_LIB_DIR := $(subst $(space),$(escaped),$(OPENCL_LIB_DIR))
-SAFE_LIB_DIR := $(subst $(open),$(escopen),$(SAFE_LIB_DIR))
-SAFE_LIB_DIR := $(subst $(close),$(escclose),$(SAFE_LIB_DIR))
-# For normal CFLAGS and LDFLAGS, we must escape spaces with a backslash to
-# make gcc happy
-CFLAGS += -I$(SAFE_INC_DIR) -DCL_USE_DEPRECATED_OPENCL_1_1_APIS
-LDFLAGS += -l$(OPENCL_LIB) -L$(SAFE_LIB_DIR)
-# For the CFLAGS used by the .depend rule, we must add quotes because
-# the rule does an extra level of shell expansions
-QUOTED_CFLAGS += -I"$(OPENCL_INC_DIR)" -DCL_USE_DEPRECATED_OPENCL_1_1_APIS
common/oclobj.h: common/opencl/x264-cl.h $(wildcard $(SRCPATH)/common/opencl/*.cl)
cat $^ | perl $(SRCPATH)/tools/cltostr.pl x264_opencl_source > $@
GENERATED += common/oclobj.h
.depend: config.mak
@rm -f .depend
- @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(QUOTED_CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;)
+ @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;)
config.mak:
./configure
}
if( p->b_opencl )
- s += sprintf( s, "opencl=%d", p->b_opencl );
- s += sprintf( s, " cabac=%d", p->b_cabac );
+ s += sprintf( s, "opencl=%d ", p->b_opencl );
+ s += sprintf( s, "cabac=%d", p->b_cabac );
s += sprintf( s, " ref=%d", p->i_frame_reference );
s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter,
p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta );
#include <assert.h>
#include <limits.h>
-#if HAVE_OPENCL
-#include "opencl.h"
-#endif
-
#if HAVE_INTERLACED
# define MB_INTERLACED h->mb.b_interlaced
# define SLICE_MBAFF h->sh.b_mbaff
};
#include "x264.h"
+#if HAVE_OPENCL
+#include "opencl.h"
+#endif
#include "cabac.h"
#include "bitstream.h"
#include "set.h"
if( x264_pthread_cond_init( &frame->cv, NULL ) )
goto fail;
+#if HAVE_OPENCL
+ frame->opencl.ocl = h->opencl.ocl;
+#endif
+
return frame;
fail:
* Copyright (C) 2012-2013 x264 project
*
* Authors: Steve Borho <sborho@multicorewareinc.com>
+ * Anton Mitrofanov <BugMaster@narod.ru>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*****************************************************************************/
#include "common.h"
-#if _WIN32
+
+#ifdef _WIN32
#include <windows.h>
+#define ocl_open LoadLibrary( "OpenCL" )
+#define ocl_close FreeLibrary
+#define ocl_address GetProcAddress
#else
#include <dlfcn.h> //dlopen, dlsym, dlclose
+#if SYS_MACOSX
+#define ocl_open dlopen( "libOpenCL.dylib", RTLD_NOW )
+#else
+#define ocl_open dlopen( "libOpenCL.so", RTLD_NOW )
+#endif
+#define ocl_close dlclose
+#define ocl_address dlsym
#endif
+#define LOAD_OCL_FUNC(name, continue_on_fail)\
+{\
+ ocl->name = (void*)ocl_address( ocl->library, #name );\
+ if( !continue_on_fail && !ocl->name )\
+ goto fail;\
+}
+
+/* load the library and functions we require from it */
+x264_opencl_function_t *x264_opencl_load_library( void )
+{
+ x264_opencl_function_t *ocl;
+#undef fail
+#define fail fail0
+ CHECKED_MALLOCZERO( ocl, sizeof(x264_opencl_function_t) );
+#undef fail
+#define fail fail1
+ ocl->library = ocl_open;
+ if( !ocl->library )
+ goto fail;
+#undef fail
+#define fail fail2
+ LOAD_OCL_FUNC( clBuildProgram, 0 );
+ LOAD_OCL_FUNC( clCreateBuffer, 0 );
+ LOAD_OCL_FUNC( clCreateCommandQueue, 0 );
+ LOAD_OCL_FUNC( clCreateContext, 0 );
+ LOAD_OCL_FUNC( clCreateImage2D, 0 );
+ LOAD_OCL_FUNC( clCreateKernel, 0 );
+ LOAD_OCL_FUNC( clCreateProgramWithBinary, 0 );
+ LOAD_OCL_FUNC( clCreateProgramWithSource, 0 );
+ LOAD_OCL_FUNC( clEnqueueCopyBuffer, 0 );
+ LOAD_OCL_FUNC( clEnqueueMapBuffer, 0 );
+ LOAD_OCL_FUNC( clEnqueueNDRangeKernel, 0 );
+ LOAD_OCL_FUNC( clEnqueueReadBuffer, 0 );
+ LOAD_OCL_FUNC( clEnqueueWriteBuffer, 0 );
+ LOAD_OCL_FUNC( clFinish, 0 );
+ LOAD_OCL_FUNC( clGetCommandQueueInfo, 0 );
+ LOAD_OCL_FUNC( clGetDeviceIDs, 0 );
+ LOAD_OCL_FUNC( clGetDeviceInfo, 0 );
+ LOAD_OCL_FUNC( clGetKernelWorkGroupInfo, 0 );
+ LOAD_OCL_FUNC( clGetPlatformIDs, 0 );
+ LOAD_OCL_FUNC( clGetProgramBuildInfo, 0 );
+ LOAD_OCL_FUNC( clGetProgramInfo, 0 );
+ LOAD_OCL_FUNC( clGetSupportedImageFormats, 0 );
+ LOAD_OCL_FUNC( clReleaseCommandQueue, 0 );
+ LOAD_OCL_FUNC( clReleaseContext, 0 );
+ LOAD_OCL_FUNC( clReleaseKernel, 0 );
+ LOAD_OCL_FUNC( clReleaseMemObject, 0 );
+ LOAD_OCL_FUNC( clReleaseProgram, 0 );
+ LOAD_OCL_FUNC( clSetKernelArg, 0 );
+ return ocl;
+#undef fail
+fail2:
+ ocl_close( ocl->library );
+fail1:
+ x264_free( ocl );
+fail0:
+ return NULL;
+}
+
+void x264_opencl_close_library( x264_opencl_function_t *ocl )
+{
+ if( !ocl )
+ return;
+ ocl_close( ocl->library );
+ x264_free( ocl );
+}
+
/* define from recent cl_ext.h, copied here in case headers are old */
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
/* Requires full include path in case of out-of-tree builds */
#include "common/oclobj.h"
-static int x264_detect_switchable_graphics();
+static int x264_detect_switchable_graphics( void );
/* Try to load the cached compiled program binary, verify the device context is
* still valid before reuse */
static cl_program x264_opencl_cache_load( x264_t *h, char *devname, char *devvendor, char *driverversion )
{
- cl_program program = NULL;
- cl_int status;
-
/* try to load cached program binary */
FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
if( !fp )
return NULL;
- fseek( fp, 0L, SEEK_END );
+ x264_opencl_function_t *ocl = h->opencl.ocl;
+ cl_program program = NULL;
+ cl_int status;
+
+ fseek( fp, 0, SEEK_END );
size_t size = ftell( fp );
rewind( fp );
uint8_t *binary;
CHECK_STRING( x264_opencl_source_hash );
#undef CHECK_STRING
- program = clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status );
+ program = ocl->clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status );
if( status != CL_SUCCESS )
program = NULL;
return;
}
+ x264_opencl_function_t *ocl = h->opencl.ocl;
size_t size;
- cl_int status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
+ cl_int status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
if( status == CL_SUCCESS )
{
uint8_t *binary;
CHECKED_MALLOC( binary, size );
- status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
+ status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
if( status == CL_SUCCESS )
{
fputs( devname, fp );
* compiled binary, stored in the current working folder. */
static cl_program x264_opencl_compile( x264_t *h )
{
+ x264_opencl_function_t *ocl = h->opencl.ocl;
cl_program program;
cl_int status;
char devname[64];
char devvendor[64];
char driverversion[64];
- status = clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
- status |= clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR, sizeof(devvendor), devvendor, NULL );
- status |= clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL );
+ status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
+ status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR, sizeof(devvendor), devvendor, NULL );
+ status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL );
if( status != CL_SUCCESS )
return NULL;
/* Detect AMD SouthernIsland or newer device (single-width registers) */
cl_uint simdwidth = 4;
- status = clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
+ status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
if( status == CL_SUCCESS && simdwidth == 1 )
{
vectorize = 0;
x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" );
const char *strptr = (const char*)x264_opencl_source;
size_t size = sizeof(x264_opencl_source);
- program = clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
+ program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
if( status != CL_SUCCESS || !program )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" );
/* Build the program binary for the OpenCL device */
const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
- status = clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
+ status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
if( status == CL_SUCCESS )
{
x264_opencl_cache_save( h, program, devname, devvendor, driverversion );
size_t build_log_len = 0;
- status = clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
+ status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
if( status != CL_SUCCESS )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
return NULL;
}
- status = clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
+ status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
if( status != CL_SUCCESS )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
return NULL;
}
-static void x264_opencl_free_lookahead( x264_t *h )
-{
-#define RELEASE( a, f ) if( a ) f( a );
- RELEASE( h->opencl.intra_kernel, clReleaseKernel )
- RELEASE( h->opencl.rowsum_intra_kernel, clReleaseKernel )
- RELEASE( h->opencl.downscale_kernel1, clReleaseKernel )
- RELEASE( h->opencl.downscale_kernel2, clReleaseKernel )
- RELEASE( h->opencl.downscale_hpel_kernel, clReleaseKernel )
- RELEASE( h->opencl.weightp_hpel_kernel, clReleaseKernel )
- RELEASE( h->opencl.weightp_scaled_images_kernel, clReleaseKernel )
- RELEASE( h->opencl.memset_kernel, clReleaseKernel )
- RELEASE( h->opencl.hme_kernel, clReleaseKernel )
- RELEASE( h->opencl.subpel_refine_kernel, clReleaseKernel )
- RELEASE( h->opencl.mode_select_kernel, clReleaseKernel )
- RELEASE( h->opencl.rowsum_inter_kernel, clReleaseKernel )
- RELEASE( h->opencl.lookahead_program, clReleaseProgram )
- RELEASE( h->opencl.row_satds[0], clReleaseMemObject )
- RELEASE( h->opencl.row_satds[1], clReleaseMemObject )
- RELEASE( h->opencl.frame_stats[0], clReleaseMemObject )
- RELEASE( h->opencl.frame_stats[1], clReleaseMemObject )
- RELEASE( h->opencl.mv_buffers[0], clReleaseMemObject )
- RELEASE( h->opencl.mv_buffers[1], clReleaseMemObject )
- RELEASE( h->opencl.mvp_buffer, clReleaseMemObject )
- RELEASE( h->opencl.luma_16x16_image[0], clReleaseMemObject )
- RELEASE( h->opencl.luma_16x16_image[1], clReleaseMemObject )
- RELEASE( h->opencl.lowres_mv_costs, clReleaseMemObject )
- RELEASE( h->opencl.lowres_costs[0], clReleaseMemObject )
- RELEASE( h->opencl.lowres_costs[1], clReleaseMemObject )
- RELEASE( h->opencl.page_locked_buffer, clReleaseMemObject )
- RELEASE( h->opencl.weighted_luma_hpel, clReleaseMemObject )
- for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
- RELEASE( h->opencl.weighted_scaled_images[i], clReleaseMemObject )
-#undef RELEASE
-}
-
-int x264_opencl_init_lookahead( x264_t *h )
+static int x264_opencl_lookahead_alloc( x264_t *h )
{
if( !h->param.rc.i_lookahead )
return -1;
"mode_selection",
"sum_inter_cost"
};
+
cl_kernel *kernels[] = {
&h->opencl.intra_kernel,
&h->opencl.rowsum_intra_kernel,
&h->opencl.mode_select_kernel,
&h->opencl.rowsum_inter_kernel
};
+
+ x264_opencl_function_t *ocl = h->opencl.ocl;
cl_int status;
h->opencl.lookahead_program = x264_opencl_compile( h );
if( !h->opencl.lookahead_program )
- {
- x264_opencl_free_lookahead( h );
- return -1;
- }
+ goto fail;
for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ )
{
- *kernels[i] = clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
+ *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
if( status != CL_SUCCESS )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
- x264_opencl_free_lookahead( h );
- return -1;
+ goto fail;
}
}
- h->opencl.page_locked_buffer = clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
+ h->opencl.page_locked_buffer = ocl->clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
if( status != CL_SUCCESS )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
- x264_opencl_free_lookahead( h );
- return -1;
+ goto fail;
}
- h->opencl.page_locked_ptr = clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
- 0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
+ h->opencl.page_locked_ptr = ocl->clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+ 0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
if( status != CL_SUCCESS )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
- x264_opencl_free_lookahead( h );
- return -1;
+ goto fail;
}
return 0;
+fail:
+ x264_opencl_lookahead_delete( h );
+ return -1;
}
static void x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
}
-int x264_opencl_init( x264_t *h )
+int x264_opencl_lookahead_init( x264_t *h )
{
+ x264_opencl_function_t *ocl = h->opencl.ocl;
cl_int status;
cl_uint numPlatforms;
int ret = -1;
- status = clGetPlatformIDs( 0, NULL, &numPlatforms );
+ status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms );
if( status != CL_SUCCESS || numPlatforms == 0 )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
}
cl_platform_id *platforms = (cl_platform_id*)x264_malloc( numPlatforms * sizeof(cl_platform_id) );
- status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+ status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL );
if( status != CL_SUCCESS )
{
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
for( cl_uint i = 0; i < numPlatforms; ++i )
{
cl_uint gpu_count = 0;
- status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
+ status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
if( status != CL_SUCCESS || !gpu_count )
continue;
if( !devices )
continue;
- status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
+ status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
if( status != CL_SUCCESS )
{
x264_free( devices );
continue;
cl_bool image_support;
- clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
+ ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
if( !image_support )
continue;
- cl_context context = clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
+ cl_context context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
if( status != CL_SUCCESS )
continue;
cl_uint imagecount = 0;
- clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
+ ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
if( !imagecount )
{
- clReleaseContext( context );
+ ocl->clReleaseContext( context );
continue;
}
cl_image_format *imageType = x264_malloc( sizeof(cl_image_format) * imagecount );
if( !imageType )
{
- clReleaseContext( context );
+ ocl->clReleaseContext( context );
continue;
}
- clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
+ ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
int b_has_r = 0;
int b_has_rgba = 0;
if( !b_has_r || !b_has_rgba )
{
char devname[64];
- status = clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
+ status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
if( status == CL_SUCCESS )
{
/* emit warning if we are discarding the user's explicit choice */
int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG;
x264_log( h, level, "OpenCL: %s does not support required image formats\n", devname);
}
- clReleaseContext( context );
+ ocl->clReleaseContext( context );
continue;
}
if( h->param.i_opencl_device )
{
h->param.i_opencl_device--;
- clReleaseContext( context );
+ ocl->clReleaseContext( context );
continue;
}
- h->opencl.queue = clCreateCommandQueue( context, h->opencl.device, 0, &status );
+ h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status );
if( status != CL_SUCCESS )
{
- clReleaseContext( context );
+ ocl->clReleaseContext( context );
continue;
}
if( ret )
x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n");
else
- ret = x264_opencl_init_lookahead( h );
+ ret = x264_opencl_lookahead_alloc( h );
return ret;
}
+static void x264_opencl_lookahead_free( x264_t *h )
+{
+ x264_opencl_function_t *ocl = h->opencl.ocl;
+
+#define RELEASE( a, f ) do { if( a ) { ocl->f( a ); a = NULL; } } while( 0 )
+ RELEASE( h->opencl.downscale_hpel_kernel, clReleaseKernel );
+ RELEASE( h->opencl.downscale_kernel1, clReleaseKernel );
+ RELEASE( h->opencl.downscale_kernel2, clReleaseKernel );
+ RELEASE( h->opencl.weightp_hpel_kernel, clReleaseKernel );
+ RELEASE( h->opencl.weightp_scaled_images_kernel, clReleaseKernel );
+ RELEASE( h->opencl.memset_kernel, clReleaseKernel );
+ RELEASE( h->opencl.intra_kernel, clReleaseKernel );
+ RELEASE( h->opencl.rowsum_intra_kernel, clReleaseKernel );
+ RELEASE( h->opencl.hme_kernel, clReleaseKernel );
+ RELEASE( h->opencl.subpel_refine_kernel, clReleaseKernel );
+ RELEASE( h->opencl.mode_select_kernel, clReleaseKernel );
+ RELEASE( h->opencl.rowsum_inter_kernel, clReleaseKernel );
+
+ RELEASE( h->opencl.lookahead_program, clReleaseProgram );
+
+ RELEASE( h->opencl.page_locked_buffer, clReleaseMemObject );
+ RELEASE( h->opencl.luma_16x16_image[0], clReleaseMemObject );
+ RELEASE( h->opencl.luma_16x16_image[1], clReleaseMemObject );
+ for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
+ RELEASE( h->opencl.weighted_scaled_images[i], clReleaseMemObject );
+ RELEASE( h->opencl.weighted_luma_hpel, clReleaseMemObject );
+ RELEASE( h->opencl.row_satds[0], clReleaseMemObject );
+ RELEASE( h->opencl.row_satds[1], clReleaseMemObject );
+ RELEASE( h->opencl.mv_buffers[0], clReleaseMemObject );
+ RELEASE( h->opencl.mv_buffers[1], clReleaseMemObject );
+ RELEASE( h->opencl.lowres_mv_costs, clReleaseMemObject );
+ RELEASE( h->opencl.mvp_buffer, clReleaseMemObject );
+ RELEASE( h->opencl.lowres_costs[0], clReleaseMemObject );
+ RELEASE( h->opencl.lowres_costs[1], clReleaseMemObject );
+ RELEASE( h->opencl.frame_stats[0], clReleaseMemObject );
+ RELEASE( h->opencl.frame_stats[1], clReleaseMemObject );
+#undef RELEASE
+}
+
+void x264_opencl_lookahead_delete( x264_t *h )
+{
+ x264_opencl_function_t *ocl = h->opencl.ocl;
+
+ if( !ocl )
+ return;
+
+ if( h->opencl.queue )
+ ocl->clFinish( h->opencl.queue );
+
+ x264_opencl_lookahead_free( h );
+
+ if( h->opencl.queue )
+ {
+ ocl->clReleaseCommandQueue( h->opencl.queue );
+ h->opencl.queue = NULL;
+ }
+ if( h->opencl.context )
+ {
+ ocl->clReleaseContext( h->opencl.context );
+ h->opencl.context = NULL;
+ }
+}
+
void x264_opencl_frame_delete( x264_frame_t *frame )
{
-#define RELEASEBUF(mem) if( mem ) clReleaseMemObject( mem );
+ x264_opencl_function_t *ocl = frame->opencl.ocl;
+
+ if( !ocl )
+ return;
+
+#define RELEASEBUF(mem) do { if( mem ) { ocl->clReleaseMemObject( mem ); mem = NULL; } } while( 0 )
for( int j = 0; j < NUM_IMAGE_SCALES; j++ )
RELEASEBUF( frame->opencl.scaled_image2Ds[j] );
RELEASEBUF( frame->opencl.luma_hpel );
#undef RELEASEBUF
}
-void x264_opencl_free( x264_t *h )
-{
- if( h->opencl.queue )
- clFinish(h->opencl.queue );
-
- x264_opencl_free_lookahead( h );
-
- if( h->opencl.queue )
- clReleaseCommandQueue( h->opencl.queue );
- if( h->opencl.context )
- clReleaseContext( h->opencl.context );
-}
-
/* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
* we consult AMD's ADL interface to detect this situation and disable
* OpenCL on these machines (Linux and Windows) */
-#ifndef _WIN32
-#define __stdcall
-#define HINSTANCE void *
+#ifdef _WIN32
+#define ADL_API_CALL
+#define ADL_CALLBACK __stdcall
+#define adl_close FreeLibrary
+#define adl_address GetProcAddress
+#else
+#define ADL_API_CALL
+#define ADL_CALLBACK
+#define adl_close dlclose
+#define adl_address dlsym
#endif
-typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );
-typedef int ( *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
-typedef int ( *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
-typedef int ( *ADL_POWERXPRESS_SCHEME_GET ) ( int, int *, int *, int * );
-typedef int ( *ADL_MAIN_CONTROL_DESTROY )();
+
+typedef void* ( ADL_CALLBACK *ADL_MAIN_MALLOC_CALLBACK )( int );
+typedef int ( ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )( ADL_MAIN_MALLOC_CALLBACK, int );
+typedef int ( ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET )( int * );
+typedef int ( ADL_API_CALL *ADL_POWERXPRESS_SCHEME_GET )( int, int *, int *, int * );
+typedef int ( ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )( void );
+
#define ADL_OK 0
#define ADL_PX_SCHEME_DYNAMIC 2
-void* __stdcall adl_malloc_wrapper( int iSize ) { return x264_malloc( iSize ); }
+static void* ADL_CALLBACK adl_malloc_wrapper( int iSize )
+{
+ return x264_malloc( iSize );
+}
-static int x264_detect_switchable_graphics()
+static int x264_detect_switchable_graphics( void )
{
+ void *hDLL;
ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
ADL_POWERXPRESS_SCHEME_GET ADL_PowerXpress_Scheme_Get;
ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
- HINSTANCE hDLL;
int ret = 0;
-#if _WIN32
+#ifdef _WIN32
hDLL = LoadLibrary( "atiadlxx.dll" );
if( !hDLL )
hDLL = LoadLibrary( "atiadlxy.dll" );
#else
hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
-#define GetProcAddress dlsym
#endif
if( !hDLL )
- return ret;
+ goto fail0;
- ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE) GetProcAddress(hDLL, "ADL_Main_Control_Create");
- ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY) GetProcAddress(hDLL, "ADL_Main_Control_Destroy");
- ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET) GetProcAddress(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
- ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET) GetProcAddress(hDLL, "ADL_PowerXpress_Scheme_Get");
+ ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE)adl_address(hDLL, "ADL_Main_Control_Create");
+ ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
+ ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
+ ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
!ADL_PowerXpress_Scheme_Get )
- goto bail;
+ goto fail1;
- if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1) )
- goto bail;
+ if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1 ) )
+ goto fail1;
int numAdapters = 0;
if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
- {
- ADL_Main_Control_Destroy();
- goto bail;
- }
+ goto fail2;
for( int i = 0; i < numAdapters; i++ )
{
}
}
+fail2:
ADL_Main_Control_Destroy();
-
-bail:
-#if _WIN32
- FreeLibrary( hDLL );
-#else
- dlclose( hDLL );
-#endif
-
+fail1:
+ adl_close( hDLL );
+fail0:
return ret;
}
* Copyright (C) 2012-2013 x264 project
*
* Authors: Steve Borho <sborho@multicorewareinc.com>
+ * Anton Mitrofanov <BugMaster@narod.ru>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#ifndef X264_OPENCL_H
#define X264_OPENCL_H
-#include "x264.h"
-#include "common/common.h"
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+#include "extras/cl.h"
-#include <CL/cl.h>
+#define OCL_API(ret, attr, name) typedef ret (attr *name##_func)
+
+/* Platform API */
+OCL_API(cl_int, CL_API_CALL, clGetPlatformIDs)
+( cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */);
+
+OCL_API(cl_int, CL_API_CALL, clGetPlatformInfo)
+( cl_platform_id /* platform */,
+ cl_platform_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+/* Device APIs */
+OCL_API(cl_int, CL_API_CALL, clGetDeviceIDs)
+( cl_platform_id /* platform */,
+ cl_device_type /* device_type */,
+ cl_uint /* num_entries */,
+ cl_device_id * /* devices */,
+ cl_uint * /* num_devices */);
+
+OCL_API(cl_int, CL_API_CALL, clGetDeviceInfo)
+( cl_device_id /* device */,
+ cl_device_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clCreateSubDevices)
+( cl_device_id /* in_device */,
+ const cl_device_partition_property * /* properties */,
+ cl_uint /* num_devices */,
+ cl_device_id * /* out_devices */,
+ cl_uint * /* num_devices_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainDevice)
+( cl_device_id /* device */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseDevice)
+( cl_device_id /* device */);
+
+/* Context APIs */
+OCL_API(cl_context, CL_API_CALL, clCreateContext)
+( const cl_context_properties * /* properties */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* devices */,
+ void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_context, CL_API_CALL, clCreateContextFromType)
+( const cl_context_properties * /* properties */,
+ cl_device_type /* device_type */,
+ void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainContext)
+( cl_context /* context */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseContext)
+( cl_context /* context */);
+
+OCL_API(cl_int, CL_API_CALL, clGetContextInfo)
+( cl_context /* context */,
+ cl_context_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+/* Command Queue APIs */
+OCL_API(cl_command_queue, CL_API_CALL, clCreateCommandQueue)
+( cl_context /* context */,
+ cl_device_id /* device */,
+ cl_command_queue_properties /* properties */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainCommandQueue)
+( cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseCommandQueue)
+( cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clGetCommandQueueInfo)
+( cl_command_queue /* command_queue */,
+ cl_command_queue_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+/* Memory Object APIs */
+OCL_API(cl_mem, CL_API_CALL, clCreateBuffer)
+( cl_context /* context */,
+ cl_mem_flags /* flags */,
+ size_t /* size */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_mem, CL_API_CALL, clCreateSubBuffer)
+( cl_mem /* buffer */,
+ cl_mem_flags /* flags */,
+ cl_buffer_create_type /* buffer_create_type */,
+ const void * /* buffer_create_info */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_mem, CL_API_CALL, clCreateImage)
+( cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ const cl_image_desc * /* image_desc */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainMemObject)
+( cl_mem /* memobj */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseMemObject)
+( cl_mem /* memobj */);
+
+OCL_API(cl_int, CL_API_CALL, clGetSupportedImageFormats)
+( cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_mem_object_type /* image_type */,
+ cl_uint /* num_entries */,
+ cl_image_format * /* image_formats */,
+ cl_uint * /* num_image_formats */);
+
+OCL_API(cl_int, CL_API_CALL, clGetMemObjectInfo)
+( cl_mem /* memobj */,
+ cl_mem_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetImageInfo)
+( cl_mem /* image */,
+ cl_image_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clSetMemObjectDestructorCallback)
+( cl_mem /* memobj */,
+ void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+ void * /*user_data */ );
+
+/* Sampler APIs */
+OCL_API(cl_sampler, CL_API_CALL, clCreateSampler)
+( cl_context /* context */,
+ cl_bool /* normalized_coords */,
+ cl_addressing_mode /* addressing_mode */,
+ cl_filter_mode /* filter_mode */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainSampler)
+( cl_sampler /* sampler */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseSampler)
+( cl_sampler /* sampler */);
+
+OCL_API(cl_int, CL_API_CALL, clGetSamplerInfo)
+( cl_sampler /* sampler */,
+ cl_sampler_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+/* Program Object APIs */
+OCL_API(cl_program, CL_API_CALL, clCreateProgramWithSource)
+( cl_context /* context */,
+ cl_uint /* count */,
+ const char ** /* strings */,
+ const size_t * /* lengths */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBinary)
+( cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const size_t * /* lengths */,
+ const unsigned char ** /* binaries */,
+ cl_int * /* binary_status */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBuiltInKernels)
+( cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* kernel_names */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainProgram)
+( cl_program /* program */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseProgram)
+( cl_program /* program */);
+
+OCL_API(cl_int, CL_API_CALL, clBuildProgram)
+( cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */);
+
+OCL_API(cl_int, CL_API_CALL, clCompileProgram)
+( cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ cl_uint /* num_input_headers */,
+ const cl_program * /* input_headers */,
+ const char ** /* header_include_names */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */);
+
+OCL_API(cl_program, CL_API_CALL, clLinkProgram)
+( cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ cl_uint /* num_input_programs */,
+ const cl_program * /* input_programs */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */ );
+
+
+OCL_API(cl_int, CL_API_CALL, clUnloadPlatformCompiler)
+( cl_platform_id /* platform */);
+
+OCL_API(cl_int, CL_API_CALL, clGetProgramInfo)
+( cl_program /* program */,
+ cl_program_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetProgramBuildInfo)
+( cl_program /* program */,
+ cl_device_id /* device */,
+ cl_program_build_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+/* Kernel Object APIs */
+OCL_API(cl_kernel, CL_API_CALL, clCreateKernel)
+( cl_program /* program */,
+ const char * /* kernel_name */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clCreateKernelsInProgram)
+( cl_program /* program */,
+ cl_uint /* num_kernels */,
+ cl_kernel * /* kernels */,
+ cl_uint * /* num_kernels_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainKernel)
+( cl_kernel /* kernel */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseKernel)
+( cl_kernel /* kernel */);
+
+OCL_API(cl_int, CL_API_CALL, clSetKernelArg)
+( cl_kernel /* kernel */,
+ cl_uint /* arg_index */,
+ size_t /* arg_size */,
+ const void * /* arg_value */);
+
+OCL_API(cl_int, CL_API_CALL, clGetKernelInfo)
+( cl_kernel /* kernel */,
+ cl_kernel_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetKernelArgInfo)
+( cl_kernel /* kernel */,
+ cl_uint /* arg_indx */,
+ cl_kernel_arg_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetKernelWorkGroupInfo)
+( cl_kernel /* kernel */,
+ cl_device_id /* device */,
+ cl_kernel_work_group_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+/* Event Object APIs */
+OCL_API(cl_int, CL_API_CALL, clWaitForEvents)
+( cl_uint /* num_events */,
+ const cl_event * /* event_list */);
+
+OCL_API(cl_int, CL_API_CALL, clGetEventInfo)
+( cl_event /* event */,
+ cl_event_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+OCL_API(cl_event, CL_API_CALL, clCreateUserEvent)
+( cl_context /* context */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainEvent)
+( cl_event /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseEvent)
+( cl_event /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clSetUserEventStatus)
+( cl_event /* event */,
+ cl_int /* execution_status */);
+
+OCL_API(cl_int, CL_API_CALL, clSetEventCallback)
+( cl_event /* event */,
+ cl_int /* command_exec_callback_type */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+ void * /* user_data */);
+
+/* Profiling APIs */
+OCL_API(cl_int, CL_API_CALL, clGetEventProfilingInfo)
+( cl_event /* event */,
+ cl_profiling_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */);
+
+/* Flush and Finish APIs */
+OCL_API(cl_int, CL_API_CALL, clFlush)
+( cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clFinish)
+( cl_command_queue /* command_queue */);
+
+/* Enqueued Commands APIs */
+OCL_API(cl_int, CL_API_CALL, clEnqueueReadBuffer)
+( cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ size_t /* offset */,
+ size_t /* size */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueReadBufferRect)
+( cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ const size_t * /* buffer_offset */,
+ const size_t * /* host_offset */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBuffer)
+( cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ size_t /* offset */,
+ size_t /* size */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBufferRect)
+( cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ const size_t * /* buffer_offset */,
+ const size_t * /* host_offset */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueFillBuffer)
+( cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ const void * /* pattern */,
+ size_t /* pattern_size */,
+ size_t /* offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBuffer)
+( cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ size_t /* src_offset */,
+ size_t /* dst_offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferRect)
+( cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin */,
+ const size_t * /* dst_origin */,
+ const size_t * /* region */,
+ size_t /* src_row_pitch */,
+ size_t /* src_slice_pitch */,
+ size_t /* dst_row_pitch */,
+ size_t /* dst_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueReadImage)
+( cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_read */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* row_pitch */,
+ size_t /* slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWriteImage)
+( cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_write */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* input_row_pitch */,
+ size_t /* input_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueFillImage)
+( cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ const void * /* fill_color */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImage)
+( cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_image */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImageToBuffer)
+( cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* dst_offset */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferToImage)
+( cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_image */,
+ size_t /* src_offset */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(void *, CL_API_CALL, clEnqueueMapBuffer)
+( cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ size_t /* offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(void *, CL_API_CALL, clEnqueueMapImage)
+( cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t * /* image_row_pitch */,
+ size_t * /* image_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueUnmapMemObject)
+( cl_command_queue /* command_queue */,
+ cl_mem /* memobj */,
+ void * /* mapped_ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueMigrateMemObjects)
+( cl_command_queue /* command_queue */,
+ cl_uint /* num_mem_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_mem_migration_flags /* flags */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueNDRangeKernel)
+( cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* work_dim */,
+ const size_t * /* global_work_offset */,
+ const size_t * /* global_work_size */,
+ const size_t * /* local_work_size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueTask)
+( cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueNativeKernel)
+( cl_command_queue /* command_queue */,
+ void (CL_CALLBACK * /*user_func*/)(void *),
+ void * /* args */,
+ size_t /* cb_args */,
+ cl_uint /* num_mem_objects */,
+ const cl_mem * /* mem_list */,
+ const void ** /* args_mem_loc */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueMarkerWithWaitList)
+( cl_command_queue /* command_queue */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueBarrierWithWaitList)
+( cl_command_queue /* command_queue */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+
+/* Extension function access
+*
+* Returns the extension function address for the given function name,
+* or NULL if a valid function can not be found. The client must
+* check to make sure the address is not NULL, before using or
+* calling the returned function address.
+*/
+OCL_API(void *, CL_API_CALL, clGetExtensionFunctionAddressForPlatform)
+( cl_platform_id /* platform */,
+ const char * /* func_name */);
+
+
+// Deprecated OpenCL 1.1 APIs
+OCL_API(cl_mem, CL_API_CALL, clCreateImage2D)
+( cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_row_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_mem, CL_API_CALL, clCreateImage3D)
+( cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_depth */,
+ size_t /* image_row_pitch */,
+ size_t /* image_slice_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueMarker)
+( cl_command_queue /* command_queue */,
+ cl_event * /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWaitForEvents)
+( cl_command_queue /* command_queue */,
+ cl_uint /* num_events */,
+ const cl_event * /* event_list */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueBarrier)
+( cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clUnloadCompiler)
+( void);
+
+OCL_API(void *, CL_API_CALL, clGetExtensionFunctionAddress)
+( const char * /* func_name */);
+
+#define OCL_DECLARE_FUNC(name) name##_func name
+
+typedef struct
+{
+ void *library;
+
+ OCL_DECLARE_FUNC( clBuildProgram );
+ OCL_DECLARE_FUNC( clCreateBuffer );
+ OCL_DECLARE_FUNC( clCreateCommandQueue );
+ OCL_DECLARE_FUNC( clCreateContext );
+ OCL_DECLARE_FUNC( clCreateImage2D );
+ OCL_DECLARE_FUNC( clCreateKernel );
+ OCL_DECLARE_FUNC( clCreateProgramWithBinary );
+ OCL_DECLARE_FUNC( clCreateProgramWithSource );
+ OCL_DECLARE_FUNC( clEnqueueCopyBuffer );
+ OCL_DECLARE_FUNC( clEnqueueMapBuffer );
+ OCL_DECLARE_FUNC( clEnqueueNDRangeKernel );
+ OCL_DECLARE_FUNC( clEnqueueReadBuffer );
+ OCL_DECLARE_FUNC( clEnqueueWriteBuffer );
+ OCL_DECLARE_FUNC( clFinish );
+ OCL_DECLARE_FUNC( clGetCommandQueueInfo );
+ OCL_DECLARE_FUNC( clGetDeviceIDs );
+ OCL_DECLARE_FUNC( clGetDeviceInfo );
+ OCL_DECLARE_FUNC( clGetKernelWorkGroupInfo );
+ OCL_DECLARE_FUNC( clGetPlatformIDs );
+ OCL_DECLARE_FUNC( clGetProgramBuildInfo );
+ OCL_DECLARE_FUNC( clGetProgramInfo );
+ OCL_DECLARE_FUNC( clGetSupportedImageFormats );
+ OCL_DECLARE_FUNC( clReleaseCommandQueue );
+ OCL_DECLARE_FUNC( clReleaseContext );
+ OCL_DECLARE_FUNC( clReleaseKernel );
+ OCL_DECLARE_FUNC( clReleaseMemObject );
+ OCL_DECLARE_FUNC( clReleaseProgram );
+ OCL_DECLARE_FUNC( clSetKernelArg );
+} x264_opencl_function_t;
/* Number of downscale resolutions to use for motion search */
#define NUM_IMAGE_SCALES 4
typedef struct
{
+ x264_opencl_function_t *ocl;
+
cl_context context;
cl_device_id device;
cl_command_queue queue;
typedef struct
{
+ x264_opencl_function_t *ocl;
+
cl_mem scaled_image2Ds[NUM_IMAGE_SCALES];
cl_mem luma_hpel;
cl_mem inv_qscale_factor;
typedef struct x264_frame x264_frame;
-int x264_opencl_init( x264_t *h );
-int x264_opencl_init_lookahead( x264_t *h );
-void x264_opencl_free( x264_t *h );
+x264_opencl_function_t *x264_opencl_load_library( void );
+void x264_opencl_close_library( x264_opencl_function_t *ocl );
+
+int x264_opencl_lookahead_init( x264_t *h );
+void x264_opencl_lookahead_delete( x264_t *h );
+
void x264_opencl_frame_delete( x264_frame *frame );
#endif
EXE=""
# list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
# parse options
[ $interlaced = yes ] && define HAVE_INTERLACED && x264_interlaced=1 || x264_interlaced=0
+libdl=""
+if [ "$opencl" = "yes" ]; then
+ opencl="no"
+ log_check "for perl"
+ output=$(perl -v)
+ if [ "$output" = "" ]; then
+ log_fail
+ echo 'OpenCL support requires perl to compile.'
+ echo 'use --disable-opencl to compile without OpenCL.'
+ exit 1
+ fi
+ log_ok
+ # cygwin can use opencl if it can use LoadLibrary
+ if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
+ opencl="yes"
+ define HAVE_OPENCL
+ elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then
+ opencl="yes"
+ define HAVE_OPENCL
+ libdl="-ldl"
+ fi
+ LDFLAGS="$LDFLAGS $libdl"
+fi
+
#define undefined vars as 0
for var in $CONFIG_HAVE; do
grep -q "HAVE_$var 1" config.h || define HAVE_$var 0
PROF_GEN_LD=$PROF_GEN_LD
PROF_USE_CC=$PROF_USE_CC
PROF_USE_LD=$PROF_USE_LD
+HAVE_OPENCL=$opencl
EOF
-if [[ $host_os != mingw* ]]; then
- # OpenCL support is only well tested on Windows/MinGW. If you
- # wish to try it on an unsupported platform, swap the lines
- # below. If OpenCL breaks, you get to keep both halves
- #opencl="yes"
- opencl="no"
-fi
-if [ "$opencl" = "yes" ]; then
- log_check "looking for perl"
- output=$(perl -v)
- if [ "$output" = "" ]; then
- echo 'OpenCL support requires perl to compile.'
- echo 'use --disable-opencl to compile without OpenCL.'
- exit 1
- elif [[ $cross_prefix != "" && $host_os == mingw* ]] ; then
- if cc_check "CL/cl.h" "-lOpenCL"; then
- echo 'HAVE_OPENCL=yes' >> config.mak
- echo 'OPENCL_LIB=OpenCL' >> config.mak
- echo "OPENCL_INC_DIR=." >> config.mak
- echo "OPENCL_LIB_DIR=." >> config.mak
- define HAVE_OPENCL
- else
- opencl="no"
- fi
- elif [ "$CUDA_PATH" != "" ]; then
- echo 'HAVE_OPENCL=yes' >> config.mak
- echo 'OPENCL_LIB=OpenCL' >> config.mak
- echo 'OPENCL_INC_DIR=$(CUDA_PATH)include' >> config.mak
- if [ "$ARCH" = "X86" ]; then
- echo 'OPENCL_LIB_DIR=$(CUDA_PATH)lib/Win32' >> config.mak
- else
- echo 'OPENCL_LIB_DIR=$(CUDA_PATH)lib/x64' >> config.mak
- fi
- define HAVE_OPENCL
- elif [ -e "$AMDAPPSDKROOT/include/CL/cl.h" ]; then
- if [[ $host_os = mingw* ]]; then
- app_path=`echo "/$AMDAPPSDKROOT" | sed 's/\\\/\//g' | sed 's/://'`
- else
- app_path='$(AMDAPPSDKROOT)'
- fi
- echo 'HAVE_OPENCL=yes' >> config.mak
- echo 'OPENCL_LIB=OpenCL' >> config.mak
- echo OPENCL_INC_DIR=$app_path/include >> config.mak
- if [ "$ARCH" = "X86" ]; then
- echo OPENCL_LIB_DIR=$app_path/lib/x86 >> config.mak
- else
- echo OPENCL_LIB_DIR=$app_path/lib/x86_64 >> config.mak
- fi
- define HAVE_OPENCL
- else
- opencl="no"
- fi
-fi
-
if [ $compiler = ICL ]; then
echo '%.o: %.c' >> config.mak
echo ' $(CC) $(CFLAGS) -c -Fo$@ $<' >> config.mak
Description: H.264 (MPEG4 AVC) encoder library
Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
Libs: -L$libdir -lx264
-Libs.private: $libpthread $libm
+Libs.private: $libpthread $libm $libdl
Cflags: -I$includedir
EOF
gpac: $gpac
gpl: $gpl
thread: $thread
+opencl: $opencl
filters: $filters
debug: $debug
gprof: $gprof
visualize: $vis
bit depth: $bit_depth
chroma format: $chroma_format
-opencl: $opencl
EOF
echo >> config.log
#include "common/visualize.h"
#endif
-#if HAVE_OPENCL
-#include "common/opencl.h"
-#endif
-
//#define DEBUG_MB_TYPE
#define bs_write_ue bs_write_ue_big
x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) )
goto fail;
+#if HAVE_OPENCL
+ if( h->param.b_opencl )
+ {
+ h->opencl.ocl = x264_opencl_load_library();
+ if( !h->opencl.ocl )
+ {
+ x264_log( h, X264_LOG_WARNING, "failed to load OpenCL\n" );
+ h->param.b_opencl = 0;
+ }
+ }
+#endif
+
h->thread[0] = h;
for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
}
#if HAVE_OPENCL
- if( h->param.b_opencl && x264_opencl_init( h ) < 0 )
+ if( h->param.b_opencl && x264_opencl_lookahead_init( h ) < 0 )
h->param.b_opencl = 0;
#endif
|| h->stat.i_mb_count[SLICE_TYPE_P][I_PCM]
|| h->stat.i_mb_count[SLICE_TYPE_B][I_PCM];
+ x264_lookahead_delete( h );
+
#if HAVE_OPENCL
- x264_opencl_free( h );
+ x264_opencl_lookahead_delete( h );
+ x264_opencl_function_t *ocl = h->opencl.ocl;
#endif
- x264_lookahead_delete( h );
-
if( h->param.b_sliced_threads )
x264_threadpool_wait_all( h );
if( h->param.i_threads > 1 )
x264_pthread_cond_destroy( &h->thread[i]->cv );
x264_free( h->thread[i] );
}
+#if HAVE_OPENCL
+ x264_opencl_close_library( ocl );
+#endif
}
int x264_encoder_delayed_frames( x264_t *h )
#include "me.h"
#if HAVE_OPENCL
-#if _WIN32
+#ifdef _WIN32
#include <windows.h>
#endif
#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
#define OCLCHECK( method, ... )\
- status = method( __VA_ARGS__ );\
+ status = ocl->method( __VA_ARGS__ );\
if( status != CL_SUCCESS ) {\
h->param.b_opencl = 0;\
h->opencl.b_fatal_error = 1;\
void x264_opencl_flush( x264_t *h )
{
- clFinish( h->opencl.queue );
+ x264_opencl_function_t *ocl = h->opencl.ocl;
+
+ ocl->clFinish( h->opencl.queue );
/* Finish copies from the GPU by copying from the page-locked buffer to
* their final destination */
return 0;
fenc->b_intra_calculated = 1;
+ x264_opencl_function_t *ocl = h->opencl.ocl;
int luma_length = fenc->i_stride[0] * fenc->i_lines[0];
#define CREATEBUF( out, flags, size )\
- out = clCreateBuffer( h->opencl.context, (flags), (size), NULL, &status );\
+ out = ocl->clCreateBuffer( h->opencl.context, (flags), (size), NULL, &status );\
if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateBuffer error '%d'\n", status ); return -1; }
#define CREATEIMAGE( out, flags, pf, width, height )\
- out = clCreateImage2D( h->opencl.context, (flags), &pf, width, height, 0, NULL, &status );\
+ out = ocl->clCreateImage2D( h->opencl.context, (flags), &pf, width, height, 0, NULL, &status );\
if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateImage2D error '%d'\n", status ); return -1; }
int mb_count = h->mb.i_mb_count;
* applications will have self-tuning code to try many possible variables and
* measure the runtime. Here we simply make an educated guess based on what we
* know GPUs typically prefer. */
-static void x264_optimal_launch_dims( size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device )
+static void x264_optimal_launch_dims( x264_t *h, size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device )
{
+ x264_opencl_function_t *ocl = h->opencl.ocl;
size_t max_work_group = 256; /* reasonable defaults for OpenCL 1.0 devices, below APIs may fail */
size_t preferred_multiple = 64;
cl_uint num_cus = 6;
- clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group, NULL );
- clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &preferred_multiple, NULL );
- clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cus, NULL );
+ ocl->clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group, NULL );
+ ocl->clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &preferred_multiple, NULL );
+ ocl->clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cus, NULL );
ldims[0] = preferred_multiple;
ldims[1] = 8;
int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w )
{
+ x264_opencl_function_t *ocl = h->opencl.ocl;
x264_frame_t *fenc = frames[b];
x264_frame_t *fref = frames[ref];
if( gdims[0] < 2 || gdims[1] < 2 )
continue;
gdims[0] <<= 2;
- x264_optimal_launch_dims( gdims, ldims, h->opencl.hme_kernel, h->opencl.device );
+ x264_optimal_launch_dims( h, gdims, ldims, h->opencl.hme_kernel, h->opencl.device );
mb_per_group = (ldims[0] >> 2) * ldims[1];
cost_local_size = 4 * mb_per_group * sizeof(int16_t);
int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor )
{
+ x264_opencl_function_t *ocl = h->opencl.ocl;
cl_int status;
x264_frame_t *fenc = frames[b];
x264_frame_t *fref0 = frames[p0];
/* For B frames, use 4 threads per MB for BIDIR checks */
ldims = ldim_bidir;
gdims[0] <<= 2;
- x264_optimal_launch_dims( gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device );
+ x264_optimal_launch_dims( h, gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device );
int mb_per_group = (ldims[0] >> 2) * ldims[1];
cost_local_size = 4 * mb_per_group * sizeof(int16_t);
satd_local_size = 16 * mb_per_group * sizeof(uint32_t);
{
if( h->param.b_opencl )
{
-#if _WIN32
+#ifdef _WIN32
/* Temporarily boost priority of this lookahead thread and the OpenCL
* driver's thread until the end of this function. On AMD GPUs this
* greatly reduces the latency of enqueuing kernels and getting results
HANDLE id = GetCurrentThread();
h->opencl.lookahead_thread_pri = GetThreadPriority( id );
SetThreadPriority( id, THREAD_PRIORITY_ABOVE_NORMAL );
- cl_int status = clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
+ x264_opencl_function_t *ocl = h->opencl.ocl;
+ cl_int status = ocl->clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
if( status == CL_SUCCESS )
{
h->opencl.opencl_thread_pri = GetThreadPriority( id );
void x264_opencl_slicetype_end( x264_t *h )
{
-#if _WIN32
+#ifdef _WIN32
if( h->param.b_opencl )
{
HANDLE id = GetCurrentThread();
SetThreadPriority( id, h->opencl.lookahead_thread_pri );
- cl_int status = clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
+ x264_opencl_function_t *ocl = h->opencl.ocl;
+ cl_int status = ocl->clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
if( status == CL_SUCCESS )
SetThreadPriority( id, h->opencl.opencl_thread_pri );
}
--- /dev/null
+/*******************************************************************************
+ * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+#ifndef __OPENCL_CL_H
+#define __OPENCL_CL_H
+
+#include "cl_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+
+typedef struct _cl_platform_id * cl_platform_id;
+typedef struct _cl_device_id * cl_device_id;
+typedef struct _cl_context * cl_context;
+typedef struct _cl_command_queue * cl_command_queue;
+typedef struct _cl_mem * cl_mem;
+typedef struct _cl_program * cl_program;
+typedef struct _cl_kernel * cl_kernel;
+typedef struct _cl_event * cl_event;
+typedef struct _cl_sampler * cl_sampler;
+
+typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
+typedef cl_ulong cl_bitfield;
+typedef cl_bitfield cl_device_type;
+typedef cl_uint cl_platform_info;
+typedef cl_uint cl_device_info;
+typedef cl_bitfield cl_device_fp_config;
+typedef cl_uint cl_device_mem_cache_type;
+typedef cl_uint cl_device_local_mem_type;
+typedef cl_bitfield cl_device_exec_capabilities;
+typedef cl_bitfield cl_command_queue_properties;
+typedef intptr_t cl_device_partition_property;
+typedef cl_bitfield cl_device_affinity_domain;
+
+typedef intptr_t cl_context_properties;
+typedef cl_uint cl_context_info;
+typedef cl_uint cl_command_queue_info;
+typedef cl_uint cl_channel_order;
+typedef cl_uint cl_channel_type;
+typedef cl_bitfield cl_mem_flags;
+typedef cl_uint cl_mem_object_type;
+typedef cl_uint cl_mem_info;
+typedef cl_bitfield cl_mem_migration_flags;
+typedef cl_uint cl_image_info;
+typedef cl_uint cl_buffer_create_type;
+typedef cl_uint cl_addressing_mode;
+typedef cl_uint cl_filter_mode;
+typedef cl_uint cl_sampler_info;
+typedef cl_bitfield cl_map_flags;
+typedef cl_uint cl_program_info;
+typedef cl_uint cl_program_build_info;
+typedef cl_uint cl_program_binary_type;
+typedef cl_int cl_build_status;
+typedef cl_uint cl_kernel_info;
+typedef cl_uint cl_kernel_arg_info;
+typedef cl_uint cl_kernel_arg_address_qualifier;
+typedef cl_uint cl_kernel_arg_access_qualifier;
+typedef cl_bitfield cl_kernel_arg_type_qualifier;
+typedef cl_uint cl_kernel_work_group_info;
+typedef cl_uint cl_event_info;
+typedef cl_uint cl_command_type;
+typedef cl_uint cl_profiling_info;
+
+
+typedef struct _cl_image_format {
+ cl_channel_order image_channel_order;
+ cl_channel_type image_channel_data_type;
+} cl_image_format;
+
+typedef struct _cl_image_desc {
+ cl_mem_object_type image_type;
+ size_t image_width;
+ size_t image_height;
+ size_t image_depth;
+ size_t image_array_size;
+ size_t image_row_pitch;
+ size_t image_slice_pitch;
+ cl_uint num_mip_levels;
+ cl_uint num_samples;
+ cl_mem buffer;
+} cl_image_desc;
+
+typedef struct _cl_buffer_region {
+ size_t origin;
+ size_t size;
+} cl_buffer_region;
+
+
+/******************************************************************************/
+
+/* Error Codes */
+#define CL_SUCCESS 0
+#define CL_DEVICE_NOT_FOUND -1
+#define CL_DEVICE_NOT_AVAILABLE -2
+#define CL_COMPILER_NOT_AVAILABLE -3
+#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
+#define CL_OUT_OF_RESOURCES -5
+#define CL_OUT_OF_HOST_MEMORY -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE -7
+#define CL_MEM_COPY_OVERLAP -8
+#define CL_IMAGE_FORMAT_MISMATCH -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
+#define CL_BUILD_PROGRAM_FAILURE -11
+#define CL_MAP_FAILURE -12
+#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
+#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
+#define CL_COMPILE_PROGRAM_FAILURE -15
+#define CL_LINKER_NOT_AVAILABLE -16
+#define CL_LINK_PROGRAM_FAILURE -17
+#define CL_DEVICE_PARTITION_FAILED -18
+#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
+
+#define CL_INVALID_VALUE -30
+#define CL_INVALID_DEVICE_TYPE -31
+#define CL_INVALID_PLATFORM -32
+#define CL_INVALID_DEVICE -33
+#define CL_INVALID_CONTEXT -34
+#define CL_INVALID_QUEUE_PROPERTIES -35
+#define CL_INVALID_COMMAND_QUEUE -36
+#define CL_INVALID_HOST_PTR -37
+#define CL_INVALID_MEM_OBJECT -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
+#define CL_INVALID_IMAGE_SIZE -40
+#define CL_INVALID_SAMPLER -41
+#define CL_INVALID_BINARY -42
+#define CL_INVALID_BUILD_OPTIONS -43
+#define CL_INVALID_PROGRAM -44
+#define CL_INVALID_PROGRAM_EXECUTABLE -45
+#define CL_INVALID_KERNEL_NAME -46
+#define CL_INVALID_KERNEL_DEFINITION -47
+#define CL_INVALID_KERNEL -48
+#define CL_INVALID_ARG_INDEX -49
+#define CL_INVALID_ARG_VALUE -50
+#define CL_INVALID_ARG_SIZE -51
+#define CL_INVALID_KERNEL_ARGS -52
+#define CL_INVALID_WORK_DIMENSION -53
+#define CL_INVALID_WORK_GROUP_SIZE -54
+#define CL_INVALID_WORK_ITEM_SIZE -55
+#define CL_INVALID_GLOBAL_OFFSET -56
+#define CL_INVALID_EVENT_WAIT_LIST -57
+#define CL_INVALID_EVENT -58
+#define CL_INVALID_OPERATION -59
+#define CL_INVALID_GL_OBJECT -60
+#define CL_INVALID_BUFFER_SIZE -61
+#define CL_INVALID_MIP_LEVEL -62
+#define CL_INVALID_GLOBAL_WORK_SIZE -63
+#define CL_INVALID_PROPERTY -64
+#define CL_INVALID_IMAGE_DESCRIPTOR -65
+#define CL_INVALID_COMPILER_OPTIONS -66
+#define CL_INVALID_LINKER_OPTIONS -67
+#define CL_INVALID_DEVICE_PARTITION_COUNT -68
+
+/* OpenCL Version */
+#define CL_VERSION_1_0 1
+#define CL_VERSION_1_1 1
+#define CL_VERSION_1_2 1
+
+/* cl_bool */
+#define CL_FALSE 0
+#define CL_TRUE 1
+#define CL_BLOCKING CL_TRUE
+#define CL_NON_BLOCKING CL_FALSE
+
+/* cl_platform_info */
+#define CL_PLATFORM_PROFILE 0x0900
+#define CL_PLATFORM_VERSION 0x0901
+#define CL_PLATFORM_NAME 0x0902
+#define CL_PLATFORM_VENDOR 0x0903
+#define CL_PLATFORM_EXTENSIONS 0x0904
+
+/* cl_device_type - bitfield */
+#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
+#define CL_DEVICE_TYPE_CPU (1 << 1)
+#define CL_DEVICE_TYPE_GPU (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
+#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
+#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
+
+/* cl_device_info */
+#define CL_DEVICE_TYPE 0x1000
+#define CL_DEVICE_VENDOR_ID 0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
+#define CL_DEVICE_ADDRESS_BITS 0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT 0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
+#define CL_DEVICE_MAX_SAMPLERS 0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
+#define CL_DEVICE_ENDIAN_LITTLE 0x1026
+#define CL_DEVICE_AVAILABLE 0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
+#define CL_DEVICE_NAME 0x102B
+#define CL_DEVICE_VENDOR 0x102C
+#define CL_DRIVER_VERSION 0x102D
+#define CL_DEVICE_PROFILE 0x102E
+#define CL_DEVICE_VERSION 0x102F
+#define CL_DEVICE_EXTENSIONS 0x1030
+#define CL_DEVICE_PLATFORM 0x1031
+#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
+/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
+#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
+#define CL_DEVICE_OPENCL_C_VERSION 0x103D
+#define CL_DEVICE_LINKER_AVAILABLE 0x103E
+#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
+#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
+#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
+#define CL_DEVICE_PARENT_DEVICE 0x1042
+#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
+#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
+#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
+#define CL_DEVICE_PARTITION_TYPE 0x1046
+#define CL_DEVICE_REFERENCE_COUNT 0x1047
+#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
+#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
+#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
+#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
+
+/* cl_device_fp_config - bitfield */
+#define CL_FP_DENORM (1 << 0)
+#define CL_FP_INF_NAN (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST (1 << 2)
+#define CL_FP_ROUND_TO_ZERO (1 << 3)
+#define CL_FP_ROUND_TO_INF (1 << 4)
+#define CL_FP_FMA (1 << 5)
+#define CL_FP_SOFT_FLOAT (1 << 6)
+#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
+
+/* cl_device_mem_cache_type */
+#define CL_NONE 0x0
+#define CL_READ_ONLY_CACHE 0x1
+#define CL_READ_WRITE_CACHE 0x2
+
+/* cl_device_local_mem_type */
+#define CL_LOCAL 0x1
+#define CL_GLOBAL 0x2
+
+/* cl_device_exec_capabilities - bitfield */
+#define CL_EXEC_KERNEL (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL (1 << 1)
+
+/* cl_command_queue_properties - bitfield */
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
+
+/* cl_context_info */
+#define CL_CONTEXT_REFERENCE_COUNT 0x1080
+#define CL_CONTEXT_DEVICES 0x1081
+#define CL_CONTEXT_PROPERTIES 0x1082
+#define CL_CONTEXT_NUM_DEVICES 0x1083
+
+/* cl_context_properties */
+#define CL_CONTEXT_PLATFORM 0x1084
+#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
+
+/* cl_device_partition_property */
+#define CL_DEVICE_PARTITION_EQUALLY 0x1086
+#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
+#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
+#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
+
+/* cl_device_affinity_domain */
+#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
+#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
+#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
+#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
+#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
+#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
+
+/* cl_command_queue_info */
+#define CL_QUEUE_CONTEXT 0x1090
+#define CL_QUEUE_DEVICE 0x1091
+#define CL_QUEUE_REFERENCE_COUNT 0x1092
+#define CL_QUEUE_PROPERTIES 0x1093
+
+/* cl_mem_flags - bitfield */
+#define CL_MEM_READ_WRITE (1 << 0)
+#define CL_MEM_WRITE_ONLY (1 << 1)
+#define CL_MEM_READ_ONLY (1 << 2)
+#define CL_MEM_USE_HOST_PTR (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
+#define CL_MEM_COPY_HOST_PTR (1 << 5)
+// reserved (1 << 6)
+#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
+#define CL_MEM_HOST_READ_ONLY (1 << 8)
+#define CL_MEM_HOST_NO_ACCESS (1 << 9)
+
+/* cl_mem_migration_flags - bitfield */
+#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
+#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
+
+/* cl_channel_order */
+#define CL_R 0x10B0
+#define CL_A 0x10B1
+#define CL_RG 0x10B2
+#define CL_RA 0x10B3
+#define CL_RGB 0x10B4
+#define CL_RGBA 0x10B5
+#define CL_BGRA 0x10B6
+#define CL_ARGB 0x10B7
+#define CL_INTENSITY 0x10B8
+#define CL_LUMINANCE 0x10B9
+#define CL_Rx 0x10BA
+#define CL_RGx 0x10BB
+#define CL_RGBx 0x10BC
+#define CL_DEPTH 0x10BD
+#define CL_DEPTH_STENCIL 0x10BE
+
+/* cl_channel_type */
+#define CL_SNORM_INT8 0x10D0
+#define CL_SNORM_INT16 0x10D1
+#define CL_UNORM_INT8 0x10D2
+#define CL_UNORM_INT16 0x10D3
+#define CL_UNORM_SHORT_565 0x10D4
+#define CL_UNORM_SHORT_555 0x10D5
+#define CL_UNORM_INT_101010 0x10D6
+#define CL_SIGNED_INT8 0x10D7
+#define CL_SIGNED_INT16 0x10D8
+#define CL_SIGNED_INT32 0x10D9
+#define CL_UNSIGNED_INT8 0x10DA
+#define CL_UNSIGNED_INT16 0x10DB
+#define CL_UNSIGNED_INT32 0x10DC
+#define CL_HALF_FLOAT 0x10DD
+#define CL_FLOAT 0x10DE
+#define CL_UNORM_INT24 0x10DF
+
+/* cl_mem_object_type */
+#define CL_MEM_OBJECT_BUFFER 0x10F0
+#define CL_MEM_OBJECT_IMAGE2D 0x10F1
+#define CL_MEM_OBJECT_IMAGE3D 0x10F2
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
+#define CL_MEM_OBJECT_IMAGE1D 0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
+
+/* cl_mem_info */
+#define CL_MEM_TYPE 0x1100
+#define CL_MEM_FLAGS 0x1101
+#define CL_MEM_SIZE 0x1102
+#define CL_MEM_HOST_PTR 0x1103
+#define CL_MEM_MAP_COUNT 0x1104
+#define CL_MEM_REFERENCE_COUNT 0x1105
+#define CL_MEM_CONTEXT 0x1106
+#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
+#define CL_MEM_OFFSET 0x1108
+
+/* cl_image_info */
+#define CL_IMAGE_FORMAT 0x1110
+#define CL_IMAGE_ELEMENT_SIZE 0x1111
+#define CL_IMAGE_ROW_PITCH 0x1112
+#define CL_IMAGE_SLICE_PITCH 0x1113
+#define CL_IMAGE_WIDTH 0x1114
+#define CL_IMAGE_HEIGHT 0x1115
+#define CL_IMAGE_DEPTH 0x1116
+#define CL_IMAGE_ARRAY_SIZE 0x1117
+#define CL_IMAGE_BUFFER 0x1118
+#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
+#define CL_IMAGE_NUM_SAMPLES 0x111A
+
+/* cl_addressing_mode */
+#define CL_ADDRESS_NONE 0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
+#define CL_ADDRESS_CLAMP 0x1132
+#define CL_ADDRESS_REPEAT 0x1133
+#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
+
+/* cl_filter_mode */
+#define CL_FILTER_NEAREST 0x1140
+#define CL_FILTER_LINEAR 0x1141
+
+/* cl_sampler_info */
+#define CL_SAMPLER_REFERENCE_COUNT 0x1150
+#define CL_SAMPLER_CONTEXT 0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
+#define CL_SAMPLER_ADDRESSING_MODE 0x1153
+#define CL_SAMPLER_FILTER_MODE 0x1154
+
+/* cl_map_flags - bitfield */
+#define CL_MAP_READ (1 << 0)
+#define CL_MAP_WRITE (1 << 1)
+#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
+
+/* cl_program_info */
+#define CL_PROGRAM_REFERENCE_COUNT 0x1160
+#define CL_PROGRAM_CONTEXT 0x1161
+#define CL_PROGRAM_NUM_DEVICES 0x1162
+#define CL_PROGRAM_DEVICES 0x1163
+#define CL_PROGRAM_SOURCE 0x1164
+#define CL_PROGRAM_BINARY_SIZES 0x1165
+#define CL_PROGRAM_BINARIES 0x1166
+#define CL_PROGRAM_NUM_KERNELS 0x1167
+#define CL_PROGRAM_KERNEL_NAMES 0x1168
+
+/* cl_program_build_info */
+#define CL_PROGRAM_BUILD_STATUS 0x1181
+#define CL_PROGRAM_BUILD_OPTIONS 0x1182
+#define CL_PROGRAM_BUILD_LOG 0x1183
+#define CL_PROGRAM_BINARY_TYPE 0x1184
+
+/* cl_program_binary_type */
+#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
+#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
+#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
+#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
+
+/* cl_build_status */
+#define CL_BUILD_SUCCESS 0
+#define CL_BUILD_NONE -1
+#define CL_BUILD_ERROR -2
+#define CL_BUILD_IN_PROGRESS -3
+
+/* cl_kernel_info */
+#define CL_KERNEL_FUNCTION_NAME 0x1190
+#define CL_KERNEL_NUM_ARGS 0x1191
+#define CL_KERNEL_REFERENCE_COUNT 0x1192
+#define CL_KERNEL_CONTEXT 0x1193
+#define CL_KERNEL_PROGRAM 0x1194
+#define CL_KERNEL_ATTRIBUTES 0x1195
+
+/* cl_kernel_arg_info */
+#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
+#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
+#define CL_KERNEL_ARG_TYPE_NAME 0x1198
+#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
+#define CL_KERNEL_ARG_NAME 0x119A
+
+/* cl_kernel_arg_address_qualifier */
+#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
+#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
+#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
+#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
+
+/* cl_kernel_arg_access_qualifier */
+#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
+#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
+#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
+#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
+
+/* cl_kernel_arg_type_qualifer */
+#define CL_KERNEL_ARG_TYPE_NONE 0
+#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
+#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
+#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
+
+/* cl_kernel_work_group_info */
+#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
+#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
+#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
+#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
+
+/* cl_event_info */
+#define CL_EVENT_COMMAND_QUEUE 0x11D0
+#define CL_EVENT_COMMAND_TYPE 0x11D1
+#define CL_EVENT_REFERENCE_COUNT 0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
+#define CL_EVENT_CONTEXT 0x11D4
+
+/* cl_command_type */
+#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
+#define CL_COMMAND_TASK 0x11F1
+#define CL_COMMAND_NATIVE_KERNEL 0x11F2
+#define CL_COMMAND_READ_BUFFER 0x11F3
+#define CL_COMMAND_WRITE_BUFFER 0x11F4
+#define CL_COMMAND_COPY_BUFFER 0x11F5
+#define CL_COMMAND_READ_IMAGE 0x11F6
+#define CL_COMMAND_WRITE_IMAGE 0x11F7
+#define CL_COMMAND_COPY_IMAGE 0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
+#define CL_COMMAND_MAP_BUFFER 0x11FB
+#define CL_COMMAND_MAP_IMAGE 0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
+#define CL_COMMAND_MARKER 0x11FE
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
+#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
+#define CL_COMMAND_READ_BUFFER_RECT 0x1201
+#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
+#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
+#define CL_COMMAND_USER 0x1204
+#define CL_COMMAND_BARRIER 0x1205
+#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
+#define CL_COMMAND_FILL_BUFFER 0x1207
+#define CL_COMMAND_FILL_IMAGE 0x1208
+
+/* command execution status */
+#define CL_COMPLETE 0x0
+#define CL_RUNNING 0x1
+#define CL_SUBMITTED 0x2
+#define CL_QUEUED 0x3
+
+/* cl_buffer_create_type */
+#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
+
+/* cl_profiling_info */
+#define CL_PROFILING_COMMAND_QUEUED 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT 0x1281
+#define CL_PROFILING_COMMAND_START 0x1282
+#define CL_PROFILING_COMMAND_END 0x1283
+
+/********************************************************************************************************/
+
+/* Platform API */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformIDs(cl_uint /* num_entries */,
+ cl_platform_id * /* platforms */,
+ cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformInfo(cl_platform_id /* platform */,
+ cl_platform_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Device APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id /* platform */,
+ cl_device_type /* device_type */,
+ cl_uint /* num_entries */,
+ cl_device_id * /* devices */,
+ cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceInfo(cl_device_id /* device */,
+ cl_device_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateSubDevices(cl_device_id /* in_device */,
+ const cl_device_partition_property * /* properties */,
+ cl_uint /* num_devices */,
+ cl_device_id * /* out_devices */,
+ cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+/* Context APIs */
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties * /* properties */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* devices */,
+ void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContextFromType(const cl_context_properties * /* properties */,
+ cl_device_type /* device_type */,
+ void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetContextInfo(cl_context /* context */,
+ cl_context_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Command Queue APIs */
+extern CL_API_ENTRY cl_command_queue CL_API_CALL
+clCreateCommandQueue(cl_context /* context */,
+ cl_device_id /* device */,
+ cl_command_queue_properties /* properties */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetCommandQueueInfo(cl_command_queue /* command_queue */,
+ cl_command_queue_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Memory Object APIs */
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBuffer(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ size_t /* size */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateSubBuffer(cl_mem /* buffer */,
+ cl_mem_flags /* flags */,
+ cl_buffer_create_type /* buffer_create_type */,
+ const void * /* buffer_create_info */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ const cl_image_desc * /* image_desc */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedImageFormats(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ cl_mem_object_type /* image_type */,
+ cl_uint /* num_entries */,
+ cl_image_format * /* image_formats */,
+ cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetMemObjectInfo(cl_mem /* memobj */,
+ cl_mem_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetImageInfo(cl_mem /* image */,
+ cl_image_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetMemObjectDestructorCallback( cl_mem /* memobj */,
+ void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+ void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1;
+
+/* Sampler APIs */
+extern CL_API_ENTRY cl_sampler CL_API_CALL
+clCreateSampler(cl_context /* context */,
+ cl_bool /* normalized_coords */,
+ cl_addressing_mode /* addressing_mode */,
+ cl_filter_mode /* filter_mode */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSamplerInfo(cl_sampler /* sampler */,
+ cl_sampler_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Program Object APIs */
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithSource(cl_context /* context */,
+ cl_uint /* count */,
+ const char ** /* strings */,
+ const size_t * /* lengths */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBinary(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const size_t * /* lengths */,
+ const unsigned char ** /* binaries */,
+ cl_int * /* binary_status */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBuiltInKernels(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* kernel_names */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clBuildProgram(cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCompileProgram(cl_program /* program */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ cl_uint /* num_input_headers */,
+ const cl_program * /* input_headers */,
+ const char ** /* header_include_names */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clLinkProgram(cl_context /* context */,
+ cl_uint /* num_devices */,
+ const cl_device_id * /* device_list */,
+ const char * /* options */,
+ cl_uint /* num_input_programs */,
+ const cl_program * /* input_programs */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+ void * /* user_data */,
+ cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
+
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramInfo(cl_program /* program */,
+ cl_program_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramBuildInfo(cl_program /* program */,
+ cl_device_id /* device */,
+ cl_program_build_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Kernel Object APIs */
+extern CL_API_ENTRY cl_kernel CL_API_CALL
+clCreateKernel(cl_program /* program */,
+ const char * /* kernel_name */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateKernelsInProgram(cl_program /* program */,
+ cl_uint /* num_kernels */,
+ cl_kernel * /* kernels */,
+ cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArg(cl_kernel /* kernel */,
+ cl_uint /* arg_index */,
+ size_t /* arg_size */,
+ const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelInfo(cl_kernel /* kernel */,
+ cl_kernel_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelArgInfo(cl_kernel /* kernel */,
+ cl_uint /* arg_indx */,
+ cl_kernel_arg_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel /* kernel */,
+ cl_device_id /* device */,
+ cl_kernel_work_group_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Event Object APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clWaitForEvents(cl_uint /* num_events */,
+ const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventInfo(cl_event /* event */,
+ cl_event_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateUserEvent(cl_context /* context */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetUserEventStatus(cl_event /* event */,
+ cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetEventCallback( cl_event /* event */,
+ cl_int /* command_exec_callback_type */,
+ void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+ void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
+
+/* Profiling APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventProfilingInfo(cl_event /* event */,
+ cl_profiling_info /* param_name */,
+ size_t /* param_value_size */,
+ void * /* param_value */,
+ size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Flush and Finish APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Enqueued Commands APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ size_t /* offset */,
+ size_t /* size */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_read */,
+ const size_t * /* buffer_offset */,
+ const size_t * /* host_offset */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ size_t /* offset */,
+ size_t /* size */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_write */,
+ const size_t * /* buffer_offset */,
+ const size_t * /* host_offset */,
+ const size_t * /* region */,
+ size_t /* buffer_row_pitch */,
+ size_t /* buffer_slice_pitch */,
+ size_t /* host_row_pitch */,
+ size_t /* host_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueFillBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ const void * /* pattern */,
+ size_t /* pattern_size */,
+ size_t /* offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ size_t /* src_offset */,
+ size_t /* dst_offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferRect(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin */,
+ const size_t * /* dst_origin */,
+ const size_t * /* region */,
+ size_t /* src_row_pitch */,
+ size_t /* src_slice_pitch */,
+ size_t /* dst_row_pitch */,
+ size_t /* dst_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_read */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* row_pitch */,
+ size_t /* slice_pitch */,
+ void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_write */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* input_row_pitch */,
+ size_t /* input_slice_pitch */,
+ const void * /* ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueFillImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ const void * /* fill_color */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImage(cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_image */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* src_image */,
+ cl_mem /* dst_buffer */,
+ const size_t * /* src_origin[3] */,
+ const size_t * /* region[3] */,
+ size_t /* dst_offset */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
+ cl_mem /* src_buffer */,
+ cl_mem /* dst_image */,
+ size_t /* src_offset */,
+ const size_t * /* dst_origin[3] */,
+ const size_t * /* region[3] */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapBuffer(cl_command_queue /* command_queue */,
+ cl_mem /* buffer */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ size_t /* offset */,
+ size_t /* size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapImage(cl_command_queue /* command_queue */,
+ cl_mem /* image */,
+ cl_bool /* blocking_map */,
+ cl_map_flags /* map_flags */,
+ const size_t * /* origin[3] */,
+ const size_t * /* region[3] */,
+ size_t * /* image_row_pitch */,
+ size_t * /* image_slice_pitch */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */,
+ cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
+ cl_mem /* memobj */,
+ void * /* mapped_ptr */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */,
+ cl_uint /* num_mem_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_mem_migration_flags /* flags */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* work_dim */,
+ const size_t * /* global_work_offset */,
+ const size_t * /* global_work_size */,
+ const size_t * /* local_work_size */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueTask(cl_command_queue /* command_queue */,
+ cl_kernel /* kernel */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNativeKernel(cl_command_queue /* command_queue */,
+ void (CL_CALLBACK * /*user_func*/)(void *),
+ void * /* args */,
+ size_t /* cb_args */,
+ cl_uint /* num_mem_objects */,
+ const cl_mem * /* mem_list */,
+ const void ** /* args_mem_loc */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+
+/* Extension function access
+ *
+ * Returns the extension function address for the given function name,
+ * or NULL if a valid function can not be found. The client must
+ * check to make sure the address is not NULL, before using or
+ * calling the returned function address.
+ */
+extern CL_API_ENTRY void * CL_API_CALL
+clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
+ const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2;
+
+
+// Deprecated OpenCL 1.1 APIs
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateImage2D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_row_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateImage3D(cl_context /* context */,
+ cl_mem_flags /* flags */,
+ const cl_image_format * /* image_format */,
+ size_t /* image_width */,
+ size_t /* image_height */,
+ size_t /* image_depth */,
+ size_t /* image_row_pitch */,
+ size_t /* image_slice_pitch */,
+ void * /* host_ptr */,
+ cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueMarker(cl_command_queue /* command_queue */,
+ cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+ cl_uint /* num_events */,
+ const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
+clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_H */
--- /dev/null
+/**********************************************************************************
+ * Copyright (c) 2008-2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
+
+#ifndef __CL_PLATFORM_H
+#define __CL_PLATFORM_H
+
+#ifdef __APPLE__
+ /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
+ #include <AvailabilityMacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32)
+ #define CL_API_ENTRY
+ #define CL_API_CALL __stdcall
+ #define CL_CALLBACK __stdcall
+#else
+ #define CL_API_ENTRY
+ #define CL_API_CALL
+ #define CL_CALLBACK
+#endif
+
+#ifdef __APPLE__
+ #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
+ #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+ #define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+
+ #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+ #else
+ #warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here!
+ #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ #endif
+#else
+ #define CL_EXTENSION_WEAK_LINK
+ #define CL_API_SUFFIX__VERSION_1_0
+ #define CL_EXT_SUFFIX__VERSION_1_0
+ #define CL_API_SUFFIX__VERSION_1_1
+ #define CL_EXT_SUFFIX__VERSION_1_1
+ #define CL_API_SUFFIX__VERSION_1_2
+ #define CL_EXT_SUFFIX__VERSION_1_2
+
+ #ifdef __GNUC__
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+ #endif
+
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #endif
+ #elif _WIN32
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)
+ #endif
+
+ #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)
+ #endif
+ #else
+ #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+
+ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+ #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+ #endif
+#endif
+
+#if (defined (_WIN32) && defined(_MSC_VER))
+
+/* scalar types */
+typedef signed __int8 cl_char;
+typedef unsigned __int8 cl_uchar;
+typedef signed __int16 cl_short;
+typedef unsigned __int16 cl_ushort;
+typedef signed __int32 cl_int;
+typedef unsigned __int32 cl_uint;
+typedef signed __int64 cl_long;
+typedef unsigned __int64 cl_ulong;
+
+typedef unsigned __int16 cl_half;
+typedef float cl_float;
+typedef double cl_double;
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT 8
+#define CL_SCHAR_MAX 127
+#define CL_SCHAR_MIN (-127-1)
+#define CL_CHAR_MAX CL_SCHAR_MAX
+#define CL_CHAR_MIN CL_SCHAR_MIN
+#define CL_UCHAR_MAX 255
+#define CL_SHRT_MAX 32767
+#define CL_SHRT_MIN (-32767-1)
+#define CL_USHRT_MAX 65535
+#define CL_INT_MAX 2147483647
+#define CL_INT_MIN (-2147483647-1)
+#define CL_UINT_MAX 0xffffffffU
+#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG 6
+#define CL_FLT_MANT_DIG 24
+#define CL_FLT_MAX_10_EXP +38
+#define CL_FLT_MAX_EXP +128
+#define CL_FLT_MIN_10_EXP -37
+#define CL_FLT_MIN_EXP -125
+#define CL_FLT_RADIX 2
+#define CL_FLT_MAX 340282346638528859811704183484516925440.0f
+#define CL_FLT_MIN 1.175494350822287507969e-38f
+#define CL_FLT_EPSILON 0x1.0p-23f
+
+#define CL_DBL_DIG 15
+#define CL_DBL_MANT_DIG 53
+#define CL_DBL_MAX_10_EXP +308
+#define CL_DBL_MAX_EXP +1024
+#define CL_DBL_MIN_10_EXP -307
+#define CL_DBL_MIN_EXP -1021
+#define CL_DBL_RADIX 2
+#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
+#define CL_DBL_MIN 2.225073858507201383090e-308
+#define CL_DBL_EPSILON 2.220446049250313080847e-16
+
+#define CL_M_E 2.718281828459045090796
+#define CL_M_LOG2E 1.442695040888963387005
+#define CL_M_LOG10E 0.434294481903251816668
+#define CL_M_LN2 0.693147180559945286227
+#define CL_M_LN10 2.302585092994045901094
+#define CL_M_PI 3.141592653589793115998
+#define CL_M_PI_2 1.570796326794896557999
+#define CL_M_PI_4 0.785398163397448278999
+#define CL_M_1_PI 0.318309886183790691216
+#define CL_M_2_PI 0.636619772367581382433
+#define CL_M_2_SQRTPI 1.128379167095512558561
+#define CL_M_SQRT2 1.414213562373095145475
+#define CL_M_SQRT1_2 0.707106781186547572737
+
+#define CL_M_E_F 2.71828174591064f
+#define CL_M_LOG2E_F 1.44269502162933f
+#define CL_M_LOG10E_F 0.43429449200630f
+#define CL_M_LN2_F 0.69314718246460f
+#define CL_M_LN10_F 2.30258512496948f
+#define CL_M_PI_F 3.14159274101257f
+#define CL_M_PI_2_F 1.57079637050629f
+#define CL_M_PI_4_F 0.78539818525314f
+#define CL_M_1_PI_F 0.31830987334251f
+#define CL_M_2_PI_F 0.63661974668503f
+#define CL_M_2_SQRTPI_F 1.12837922573090f
+#define CL_M_SQRT2_F 1.41421353816986f
+#define CL_M_SQRT1_2_F 0.70710676908493f
+
+#define CL_NAN (CL_INFINITY - CL_INFINITY)
+#define CL_HUGE_VALF ((cl_float) 1e50)
+#define CL_HUGE_VAL ((cl_double) 1e500)
+#define CL_MAXFLOAT CL_FLT_MAX
+#define CL_INFINITY CL_HUGE_VALF
+
+#else
+
+#include <stdint.h>
+
+/* scalar types */
+typedef int8_t cl_char;
+typedef uint8_t cl_uchar;
+typedef int16_t cl_short __attribute__((aligned(2)));
+typedef uint16_t cl_ushort __attribute__((aligned(2)));
+typedef int32_t cl_int __attribute__((aligned(4)));
+typedef uint32_t cl_uint __attribute__((aligned(4)));
+typedef int64_t cl_long __attribute__((aligned(8)));
+typedef uint64_t cl_ulong __attribute__((aligned(8)));
+
+typedef uint16_t cl_half __attribute__((aligned(2)));
+typedef float cl_float __attribute__((aligned(4)));
+typedef double cl_double __attribute__((aligned(8)));
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT 8
+#define CL_SCHAR_MAX 127
+#define CL_SCHAR_MIN (-127-1)
+#define CL_CHAR_MAX CL_SCHAR_MAX
+#define CL_CHAR_MIN CL_SCHAR_MIN
+#define CL_UCHAR_MAX 255
+#define CL_SHRT_MAX 32767
+#define CL_SHRT_MIN (-32767-1)
+#define CL_USHRT_MAX 65535
+#define CL_INT_MAX 2147483647
+#define CL_INT_MIN (-2147483647-1)
+#define CL_UINT_MAX 0xffffffffU
+#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG 6
+#define CL_FLT_MANT_DIG 24
+#define CL_FLT_MAX_10_EXP +38
+#define CL_FLT_MAX_EXP +128
+#define CL_FLT_MIN_10_EXP -37
+#define CL_FLT_MIN_EXP -125
+#define CL_FLT_RADIX 2
+#define CL_FLT_MAX 0x1.fffffep127f
+#define CL_FLT_MIN 0x1.0p-126f
+#define CL_FLT_EPSILON 0x1.0p-23f
+
+#define CL_DBL_DIG 15
+#define CL_DBL_MANT_DIG 53
+#define CL_DBL_MAX_10_EXP +308
+#define CL_DBL_MAX_EXP +1024
+#define CL_DBL_MIN_10_EXP -307
+#define CL_DBL_MIN_EXP -1021
+#define CL_DBL_RADIX 2
+#define CL_DBL_MAX 0x1.fffffffffffffp1023
+#define CL_DBL_MIN 0x1.0p-1022
+#define CL_DBL_EPSILON 0x1.0p-52
+
+#define CL_M_E 2.718281828459045090796
+#define CL_M_LOG2E 1.442695040888963387005
+#define CL_M_LOG10E 0.434294481903251816668
+#define CL_M_LN2 0.693147180559945286227
+#define CL_M_LN10 2.302585092994045901094
+#define CL_M_PI 3.141592653589793115998
+#define CL_M_PI_2 1.570796326794896557999
+#define CL_M_PI_4 0.785398163397448278999
+#define CL_M_1_PI 0.318309886183790691216
+#define CL_M_2_PI 0.636619772367581382433
+#define CL_M_2_SQRTPI 1.128379167095512558561
+#define CL_M_SQRT2 1.414213562373095145475
+#define CL_M_SQRT1_2 0.707106781186547572737
+
+#define CL_M_E_F 2.71828174591064f
+#define CL_M_LOG2E_F 1.44269502162933f
+#define CL_M_LOG10E_F 0.43429449200630f
+#define CL_M_LN2_F 0.69314718246460f
+#define CL_M_LN10_F 2.30258512496948f
+#define CL_M_PI_F 3.14159274101257f
+#define CL_M_PI_2_F 1.57079637050629f
+#define CL_M_PI_4_F 0.78539818525314f
+#define CL_M_1_PI_F 0.31830987334251f
+#define CL_M_2_PI_F 0.63661974668503f
+#define CL_M_2_SQRTPI_F 1.12837922573090f
+#define CL_M_SQRT2_F 1.41421353816986f
+#define CL_M_SQRT1_2_F 0.70710676908493f
+
+#if defined( __GNUC__ )
+ #define CL_HUGE_VALF __builtin_huge_valf()
+ #define CL_HUGE_VAL __builtin_huge_val()
+ #define CL_NAN __builtin_nanf( "" )
+#else
+ #define CL_HUGE_VALF ((cl_float) 1e50)
+ #define CL_HUGE_VAL ((cl_double) 1e500)
+ float nanf( const char * );
+ #define CL_NAN nanf( "" )
+#endif
+#define CL_MAXFLOAT CL_FLT_MAX
+#define CL_INFINITY CL_HUGE_VALF
+
+#endif
+
+#include <stddef.h>
+
+/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
+typedef unsigned int cl_GLuint;
+typedef int cl_GLint;
+typedef unsigned int cl_GLenum;
+
+/*
+ * Vector types
+ *
+ * Note: OpenCL requires that all types be naturally aligned.
+ * This means that vector types must be naturally aligned.
+ * For example, a vector of four floats must be aligned to
+ * a 16 byte boundary (calculated as 4 * the natural 4-byte
+ * alignment of the float). The alignment qualifiers here
+ * will only function properly if your compiler supports them
+ * and if you don't actively work to defeat them. For example,
+ * in order for a cl_float4 to be 16 byte aligned in a struct,
+ * the start of the struct must itself be 16-byte aligned.
+ *
+ * Maintaining proper alignment is the user's responsibility.
+ */
+
+/* Define basic vector types */
+#if defined( __VEC__ )
+ #include <altivec.h> /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
+ typedef vector unsigned char __cl_uchar16;
+ typedef vector signed char __cl_char16;
+ typedef vector unsigned short __cl_ushort8;
+ typedef vector signed short __cl_short8;
+ typedef vector unsigned int __cl_uint4;
+ typedef vector signed int __cl_int4;
+ typedef vector float __cl_float4;
+ #define __CL_UCHAR16__ 1
+ #define __CL_CHAR16__ 1
+ #define __CL_USHORT8__ 1
+ #define __CL_SHORT8__ 1
+ #define __CL_UINT4__ 1
+ #define __CL_INT4__ 1
+ #define __CL_FLOAT4__ 1
+#endif
+
+#if defined( __SSE__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <xmmintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef float __cl_float4 __attribute__((vector_size(16)));
+ #else
+ typedef __m128 __cl_float4;
+ #endif
+ #define __CL_FLOAT4__ 1
+#endif
+
+#if defined( __SSE2__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <emmintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16)));
+ typedef cl_char __cl_char16 __attribute__((vector_size(16)));
+ typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16)));
+ typedef cl_short __cl_short8 __attribute__((vector_size(16)));
+ typedef cl_uint __cl_uint4 __attribute__((vector_size(16)));
+ typedef cl_int __cl_int4 __attribute__((vector_size(16)));
+ typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16)));
+ typedef cl_long __cl_long2 __attribute__((vector_size(16)));
+ typedef cl_double __cl_double2 __attribute__((vector_size(16)));
+ #else
+ typedef __m128i __cl_uchar16;
+ typedef __m128i __cl_char16;
+ typedef __m128i __cl_ushort8;
+ typedef __m128i __cl_short8;
+ typedef __m128i __cl_uint4;
+ typedef __m128i __cl_int4;
+ typedef __m128i __cl_ulong2;
+ typedef __m128i __cl_long2;
+ typedef __m128d __cl_double2;
+ #endif
+ #define __CL_UCHAR16__ 1
+ #define __CL_CHAR16__ 1
+ #define __CL_USHORT8__ 1
+ #define __CL_SHORT8__ 1
+ #define __CL_INT4__ 1
+ #define __CL_UINT4__ 1
+ #define __CL_ULONG2__ 1
+ #define __CL_LONG2__ 1
+ #define __CL_DOUBLE2__ 1
+#endif
+
+#if defined( __MMX__ )
+ #include <mmintrin.h>
+ #if defined( __GNUC__ )
+ typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8)));
+ typedef cl_char __cl_char8 __attribute__((vector_size(8)));
+ typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8)));
+ typedef cl_short __cl_short4 __attribute__((vector_size(8)));
+ typedef cl_uint __cl_uint2 __attribute__((vector_size(8)));
+ typedef cl_int __cl_int2 __attribute__((vector_size(8)));
+ typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8)));
+ typedef cl_long __cl_long1 __attribute__((vector_size(8)));
+ typedef cl_float __cl_float2 __attribute__((vector_size(8)));
+ #else
+ typedef __m64 __cl_uchar8;
+ typedef __m64 __cl_char8;
+ typedef __m64 __cl_ushort4;
+ typedef __m64 __cl_short4;
+ typedef __m64 __cl_uint2;
+ typedef __m64 __cl_int2;
+ typedef __m64 __cl_ulong1;
+ typedef __m64 __cl_long1;
+ typedef __m64 __cl_float2;
+ #endif
+ #define __CL_UCHAR8__ 1
+ #define __CL_CHAR8__ 1
+ #define __CL_USHORT4__ 1
+ #define __CL_SHORT4__ 1
+ #define __CL_INT2__ 1
+ #define __CL_UINT2__ 1
+ #define __CL_ULONG1__ 1
+ #define __CL_LONG1__ 1
+ #define __CL_FLOAT2__ 1
+#endif
+
+#if defined( __AVX__ )
+ #if defined( __MINGW64__ )
+ #include <intrin.h>
+ #else
+ #include <immintrin.h>
+ #endif
+ #if defined( __GNUC__ )
+ typedef cl_float __cl_float8 __attribute__((vector_size(32)));
+ typedef cl_double __cl_double4 __attribute__((vector_size(32)));
+ #else
+ typedef __m256 __cl_float8;
+ typedef __m256d __cl_double4;
+ #endif
+ #define __CL_FLOAT8__ 1
+ #define __CL_DOUBLE4__ 1
+#endif
+
+/* Define alignment keys */
+#if defined( __GNUC__ )
+ #define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
+#elif defined( _WIN32) && (_MSC_VER)
+ /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */
+ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */
+ /* #include <crtdefs.h> */
+ /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */
+ #define CL_ALIGNED(_x)
+#else
+ #warning Need to implement some method to align data here
+ #define CL_ALIGNED(_x)
+#endif
+
+/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ /* .xyzw and .s0123...{f|F} are supported */
+ #define CL_HAS_NAMED_VECTOR_FIELDS 1
+ /* .hi and .lo are supported */
+ #define CL_HAS_HI_LO_VECTOR_FIELDS 1
+#endif
+
+/* Define cl_vector types */
+
+/* ---- cl_charn ---- */
+typedef union
+{
+ cl_char CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y; };
+ __extension__ struct{ cl_char s0, s1; };
+ __extension__ struct{ cl_char lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2;
+#endif
+}cl_char2;
+
+typedef union
+{
+ cl_char CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y, z, w; };
+ __extension__ struct{ cl_char s0, s1, s2, s3; };
+ __extension__ struct{ cl_char2 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[2];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4;
+#endif
+}cl_char4;
+
+/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
+typedef cl_char4 cl_char3;
+
+typedef union
+{
+ cl_char CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y, z, w; };
+ __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_char4 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[4];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4[2];
+#endif
+#if defined( __CL_CHAR8__ )
+ __cl_char8 v8;
+#endif
+}cl_char8;
+
+typedef union
+{
+ cl_char CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_char8 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+ __cl_char2 v2[8];
+#endif
+#if defined( __CL_CHAR4__)
+ __cl_char4 v4[4];
+#endif
+#if defined( __CL_CHAR8__ )
+ __cl_char8 v8[2];
+#endif
+#if defined( __CL_CHAR16__ )
+ __cl_char16 v16;
+#endif
+}cl_char16;
+
+
+/* ---- cl_ucharn ---- */
+typedef union
+{
+ cl_uchar CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y; };
+ __extension__ struct{ cl_uchar s0, s1; };
+ __extension__ struct{ cl_uchar lo, hi; };
+#endif
+#if defined( __cl_uchar2__)
+ __cl_uchar2 v2;
+#endif
+}cl_uchar2;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y, z, w; };
+ __extension__ struct{ cl_uchar s0, s1, s2, s3; };
+ __extension__ struct{ cl_uchar2 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[2];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4;
+#endif
+}cl_uchar4;
+
+/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
+typedef cl_uchar4 cl_uchar3;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y, z, w; };
+ __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_uchar4 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[4];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4[2];
+#endif
+#if defined( __CL_UCHAR8__ )
+ __cl_uchar8 v8;
+#endif
+}cl_uchar8;
+
+typedef union
+{
+ cl_uchar CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_uchar8 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+ __cl_uchar2 v2[8];
+#endif
+#if defined( __CL_UCHAR4__)
+ __cl_uchar4 v4[4];
+#endif
+#if defined( __CL_UCHAR8__ )
+ __cl_uchar8 v8[2];
+#endif
+#if defined( __CL_UCHAR16__ )
+ __cl_uchar16 v16;
+#endif
+}cl_uchar16;
+
+
+/* ---- cl_shortn ---- */
+typedef union
+{
+ cl_short CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y; };
+ __extension__ struct{ cl_short s0, s1; };
+ __extension__ struct{ cl_short lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2;
+#endif
+}cl_short2;
+
+typedef union
+{
+ cl_short CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y, z, w; };
+ __extension__ struct{ cl_short s0, s1, s2, s3; };
+ __extension__ struct{ cl_short2 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[2];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4;
+#endif
+}cl_short4;
+
+/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
+typedef cl_short4 cl_short3;
+
+typedef union
+{
+ cl_short CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y, z, w; };
+ __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_short4 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[4];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4[2];
+#endif
+#if defined( __CL_SHORT8__ )
+ __cl_short8 v8;
+#endif
+}cl_short8;
+
+typedef union
+{
+ cl_short CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_short8 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+ __cl_short2 v2[8];
+#endif
+#if defined( __CL_SHORT4__)
+ __cl_short4 v4[4];
+#endif
+#if defined( __CL_SHORT8__ )
+ __cl_short8 v8[2];
+#endif
+#if defined( __CL_SHORT16__ )
+ __cl_short16 v16;
+#endif
+}cl_short16;
+
+
+/* ---- cl_ushortn ---- */
+typedef union
+{
+ cl_ushort CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y; };
+ __extension__ struct{ cl_ushort s0, s1; };
+ __extension__ struct{ cl_ushort lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2;
+#endif
+}cl_ushort2;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y, z, w; };
+ __extension__ struct{ cl_ushort s0, s1, s2, s3; };
+ __extension__ struct{ cl_ushort2 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[2];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4;
+#endif
+}cl_ushort4;
+
+/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
+typedef cl_ushort4 cl_ushort3;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y, z, w; };
+ __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_ushort4 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[4];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4[2];
+#endif
+#if defined( __CL_USHORT8__ )
+ __cl_ushort8 v8;
+#endif
+}cl_ushort8;
+
+typedef union
+{
+ cl_ushort CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_ushort8 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+ __cl_ushort2 v2[8];
+#endif
+#if defined( __CL_USHORT4__)
+ __cl_ushort4 v4[4];
+#endif
+#if defined( __CL_USHORT8__ )
+ __cl_ushort8 v8[2];
+#endif
+#if defined( __CL_USHORT16__ )
+ __cl_ushort16 v16;
+#endif
+}cl_ushort16;
+
+/* ---- cl_intn ---- */
+typedef union
+{
+ cl_int CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y; };
+ __extension__ struct{ cl_int s0, s1; };
+ __extension__ struct{ cl_int lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2;
+#endif
+}cl_int2;
+
+typedef union
+{
+ cl_int CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y, z, w; };
+ __extension__ struct{ cl_int s0, s1, s2, s3; };
+ __extension__ struct{ cl_int2 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[2];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4;
+#endif
+}cl_int4;
+
+/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
+typedef cl_int4 cl_int3;
+
+typedef union
+{
+ cl_int CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y, z, w; };
+ __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_int4 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[4];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4[2];
+#endif
+#if defined( __CL_INT8__ )
+ __cl_int8 v8;
+#endif
+}cl_int8;
+
+typedef union
+{
+ cl_int CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_int8 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+ __cl_int2 v2[8];
+#endif
+#if defined( __CL_INT4__)
+ __cl_int4 v4[4];
+#endif
+#if defined( __CL_INT8__ )
+ __cl_int8 v8[2];
+#endif
+#if defined( __CL_INT16__ )
+ __cl_int16 v16;
+#endif
+}cl_int16;
+
+
+/* ---- cl_uintn ---- */
+typedef union
+{
+ cl_uint CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y; };
+ __extension__ struct{ cl_uint s0, s1; };
+ __extension__ struct{ cl_uint lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2;
+#endif
+}cl_uint2;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y, z, w; };
+ __extension__ struct{ cl_uint s0, s1, s2, s3; };
+ __extension__ struct{ cl_uint2 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[2];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4;
+#endif
+}cl_uint4;
+
+/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
+typedef cl_uint4 cl_uint3;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y, z, w; };
+ __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_uint4 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[4];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4[2];
+#endif
+#if defined( __CL_UINT8__ )
+ __cl_uint8 v8;
+#endif
+}cl_uint8;
+
+typedef union
+{
+ cl_uint CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_uint8 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+ __cl_uint2 v2[8];
+#endif
+#if defined( __CL_UINT4__)
+ __cl_uint4 v4[4];
+#endif
+#if defined( __CL_UINT8__ )
+ __cl_uint8 v8[2];
+#endif
+#if defined( __CL_UINT16__ )
+ __cl_uint16 v16;
+#endif
+}cl_uint16;
+
+/* ---- cl_longn ---- */
+typedef union
+{
+ cl_long CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y; };
+ __extension__ struct{ cl_long s0, s1; };
+ __extension__ struct{ cl_long lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2;
+#endif
+}cl_long2;
+
+typedef union
+{
+ cl_long CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y, z, w; };
+ __extension__ struct{ cl_long s0, s1, s2, s3; };
+ __extension__ struct{ cl_long2 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[2];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4;
+#endif
+}cl_long4;
+
+/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
+typedef cl_long4 cl_long3;
+
+typedef union
+{
+ cl_long CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y, z, w; };
+ __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_long4 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[4];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4[2];
+#endif
+#if defined( __CL_LONG8__ )
+ __cl_long8 v8;
+#endif
+}cl_long8;
+
+typedef union
+{
+ cl_long CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_long8 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+ __cl_long2 v2[8];
+#endif
+#if defined( __CL_LONG4__)
+ __cl_long4 v4[4];
+#endif
+#if defined( __CL_LONG8__ )
+ __cl_long8 v8[2];
+#endif
+#if defined( __CL_LONG16__ )
+ __cl_long16 v16;
+#endif
+}cl_long16;
+
+
+/* ---- cl_ulongn ---- */
+typedef union
+{
+ cl_ulong CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y; };
+ __extension__ struct{ cl_ulong s0, s1; };
+ __extension__ struct{ cl_ulong lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2;
+#endif
+}cl_ulong2;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y, z, w; };
+ __extension__ struct{ cl_ulong s0, s1, s2, s3; };
+ __extension__ struct{ cl_ulong2 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[2];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4;
+#endif
+}cl_ulong4;
+
+/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
+typedef cl_ulong4 cl_ulong3;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y, z, w; };
+ __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_ulong4 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[4];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4[2];
+#endif
+#if defined( __CL_ULONG8__ )
+ __cl_ulong8 v8;
+#endif
+}cl_ulong8;
+
+typedef union
+{
+ cl_ulong CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_ulong8 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+ __cl_ulong2 v2[8];
+#endif
+#if defined( __CL_ULONG4__)
+ __cl_ulong4 v4[4];
+#endif
+#if defined( __CL_ULONG8__ )
+ __cl_ulong8 v8[2];
+#endif
+#if defined( __CL_ULONG16__ )
+ __cl_ulong16 v16;
+#endif
+}cl_ulong16;
+
+
+/* --- cl_floatn ---- */
+
+typedef union
+{
+ cl_float CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y; };
+ __extension__ struct{ cl_float s0, s1; };
+ __extension__ struct{ cl_float lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2;
+#endif
+}cl_float2;
+
+typedef union
+{
+ cl_float CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y, z, w; };
+ __extension__ struct{ cl_float s0, s1, s2, s3; };
+ __extension__ struct{ cl_float2 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[2];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4;
+#endif
+}cl_float4;
+
+/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
+typedef cl_float4 cl_float3;
+
+typedef union
+{
+ cl_float CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y, z, w; };
+ __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_float4 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[4];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4[2];
+#endif
+#if defined( __CL_FLOAT8__ )
+ __cl_float8 v8;
+#endif
+}cl_float8;
+
+typedef union
+{
+ cl_float CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_float8 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+ __cl_float2 v2[8];
+#endif
+#if defined( __CL_FLOAT4__)
+ __cl_float4 v4[4];
+#endif
+#if defined( __CL_FLOAT8__ )
+ __cl_float8 v8[2];
+#endif
+#if defined( __CL_FLOAT16__ )
+ __cl_float16 v16;
+#endif
+}cl_float16;
+
+/* --- cl_doublen ---- */
+
+typedef union
+{
+ cl_double CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y; };
+ __extension__ struct{ cl_double s0, s1; };
+ __extension__ struct{ cl_double lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2;
+#endif
+}cl_double2;
+
+typedef union
+{
+ cl_double CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y, z, w; };
+ __extension__ struct{ cl_double s0, s1, s2, s3; };
+ __extension__ struct{ cl_double2 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[2];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4;
+#endif
+}cl_double4;
+
+/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
+typedef cl_double4 cl_double3;
+
+typedef union
+{
+ cl_double CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y, z, w; };
+ __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; };
+ __extension__ struct{ cl_double4 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[4];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4[2];
+#endif
+#if defined( __CL_DOUBLE8__ )
+ __cl_double8 v8;
+#endif
+}cl_double8;
+
+typedef union
+{
+ cl_double CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+ __extension__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+ __extension__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+ __extension__ struct{ cl_double8 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+ __cl_double2 v2[8];
+#endif
+#if defined( __CL_DOUBLE4__)
+ __cl_double4 v4[4];
+#endif
+#if defined( __CL_DOUBLE8__ )
+ __cl_double8 v8[2];
+#endif
+#if defined( __CL_DOUBLE16__ )
+ __cl_double16 v16;
+#endif
+}cl_double16;
+
+/* Macro to facilitate debugging
+ * Usage:
+ * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
+ * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \"
+ * Each line thereafter of OpenCL C source must end with: \n\
+ * The last line ends in ";
+ *
+ * Example:
+ *
+ * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
+ * kernel void foo( int a, float * b ) \n\
+ * { \n\
+ * // my comment \n\
+ * *b[ get_global_id(0)] = a; \n\
+ * } \n\
+ * ";
+ *
+ * This should correctly set up the line, (column) and file information for your source
+ * string so you can do source level debugging.
+ */
+#define __CL_STRINGIFY( _x ) # _x
+#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x )
+#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CL_PLATFORM_H */
#include "x264_config.h"
-#define X264_BUILD 132
+#define X264_BUILD 133
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment