]> git.sesse.net Git - x264/commitdiff
OpenCL support improvement/refactoring
authorAnton Mitrofanov <BugMaster@narod.ru>
Mon, 6 May 2013 18:51:11 +0000 (22:51 +0400)
committerFiona Glaser <fiona@x264.com>
Mon, 20 May 2013 19:25:24 +0000 (12:25 -0700)
Autoload the OpenCL library so that it's not required to run an openCL-enabled
build of x264.

Update X264_BUILD, which should have been changed with the first patch.

12 files changed:
Makefile
common/common.c
common/common.h
common/frame.c
common/opencl.c
common/opencl.h
configure
encoder/encoder.c
encoder/slicetype-cl.c
extras/cl.h [new file with mode: 0644]
extras/cl_platform.h [new file with mode: 0644]
x264.h

index da1b45544515841125955ce2c1bfbfaf4a4034ca..a1f122b4afe34ab6ba0dbb570f28c779cd00e23f 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -147,29 +147,7 @@ OBJSO  += $(if $(RC), x264res.dll.o)
 endif
 endif
 
-QUOTED_CFLAGS := $(CFLAGS)
-
 ifeq ($(HAVE_OPENCL),yes)
-empty:=
-space:=$(empty) $(empty)
-escaped:=\ $(empty)
-open:=(
-escopen:=\(
-close:=)
-escclose:=\)
-SAFE_INC_DIR := $(subst $(space),$(escaped),$(OPENCL_INC_DIR))
-SAFE_INC_DIR := $(subst $(open),$(escopen),$(SAFE_INC_DIR))
-SAFE_INC_DIR := $(subst $(close),$(escclose),$(SAFE_INC_DIR))
-SAFE_LIB_DIR := $(subst $(space),$(escaped),$(OPENCL_LIB_DIR))
-SAFE_LIB_DIR := $(subst $(open),$(escopen),$(SAFE_LIB_DIR))
-SAFE_LIB_DIR := $(subst $(close),$(escclose),$(SAFE_LIB_DIR))
-# For normal CFLAGS and LDFLAGS, we must escape spaces with a backslash to
-# make gcc happy
-CFLAGS += -I$(SAFE_INC_DIR) -DCL_USE_DEPRECATED_OPENCL_1_1_APIS
-LDFLAGS += -l$(OPENCL_LIB) -L$(SAFE_LIB_DIR)
-# For the CFLAGS used by the .depend rule, we must add quotes because
-# the rule does an extra level of shell expansions
-QUOTED_CFLAGS += -I"$(OPENCL_INC_DIR)" -DCL_USE_DEPRECATED_OPENCL_1_1_APIS
 common/oclobj.h: common/opencl/x264-cl.h $(wildcard $(SRCPATH)/common/opencl/*.cl)
        cat $^ | perl $(SRCPATH)/tools/cltostr.pl x264_opencl_source > $@
 GENERATED += common/oclobj.h
@@ -224,7 +202,7 @@ $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK): .depend
 
 .depend: config.mak
        @rm -f .depend
-       @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(QUOTED_CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;)
+       @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;)
 
 config.mak:
        ./configure
index 503d985342afe71d5ff03e993b7408928c71a3dd..d83956c92b9b718f2b67d2ccd1c3b919340a76b8 100644 (file)
@@ -1293,8 +1293,8 @@ char *x264_param2string( x264_param_t *p, int b_res )
     }
 
     if( p->b_opencl )
-        s += sprintf( s, "opencl=%d", p->b_opencl );
-    s += sprintf( s, " cabac=%d", p->b_cabac );
+        s += sprintf( s, "opencl=%d ", p->b_opencl );
+    s += sprintf( s, "cabac=%d", p->b_cabac );
     s += sprintf( s, " ref=%d", p->i_frame_reference );
     s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter,
                   p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta );
index 1732d59b698a90509706412f9b86dd8f2d69ac16..b7cd22f697ecdf70991f26691048b65a02b631d3 100644 (file)
@@ -96,10 +96,6 @@ do {\
 #include <assert.h>
 #include <limits.h>
 
-#if HAVE_OPENCL
-#include "opencl.h"
-#endif
-
 #if HAVE_INTERLACED
 #   define MB_INTERLACED h->mb.b_interlaced
 #   define SLICE_MBAFF h->sh.b_mbaff
@@ -209,6 +205,9 @@ static const uint8_t x264_scan8[16*3 + 3] =
 };
 
 #include "x264.h"
+#if HAVE_OPENCL
+#include "opencl.h"
+#endif
 #include "cabac.h"
 #include "bitstream.h"
 #include "set.h"
index c1ddb569ce85800fc5a8aceb5f0b33b504c279e4..e56da8efda48e9967cc401b88303aa1b0479bfad 100644 (file)
@@ -261,6 +261,10 @@ static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
     if( x264_pthread_cond_init( &frame->cv, NULL ) )
         goto fail;
 
+#if HAVE_OPENCL
+    frame->opencl.ocl = h->opencl.ocl;
+#endif
+
     return frame;
 
 fail:
index aa2bd045492d519ead3fe59709a639c4d1f80698..5b1bb69d499bfeb21c345ebb612d0fb8dfe82a3d 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2012-2013 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
+ *          Anton Mitrofanov <BugMaster@narod.ru>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  *****************************************************************************/
 
 #include "common.h"
-#if _WIN32
+
+#ifdef _WIN32
 #include <windows.h>
+#define ocl_open LoadLibrary( "OpenCL" )
+#define ocl_close FreeLibrary
+#define ocl_address GetProcAddress
 #else
 #include <dlfcn.h> //dlopen, dlsym, dlclose
+#if SYS_MACOSX
+#define ocl_open dlopen( "libOpenCL.dylib", RTLD_NOW )
+#else
+#define ocl_open dlopen( "libOpenCL.so", RTLD_NOW )
+#endif
+#define ocl_close dlclose
+#define ocl_address dlsym
 #endif
 
+#define LOAD_OCL_FUNC(name, continue_on_fail)\
+{\
+    ocl->name = (void*)ocl_address( ocl->library, #name );\
+    if( !continue_on_fail && !ocl->name )\
+        goto fail;\
+}
+
+/* load the library and functions we require from it */
+x264_opencl_function_t *x264_opencl_load_library( void )
+{
+    x264_opencl_function_t *ocl;
+#undef fail
+#define fail fail0
+    CHECKED_MALLOCZERO( ocl, sizeof(x264_opencl_function_t) );
+#undef fail
+#define fail fail1
+    ocl->library = ocl_open;
+    if( !ocl->library )
+        goto fail;
+#undef fail
+#define fail fail2
+    LOAD_OCL_FUNC( clBuildProgram, 0 );
+    LOAD_OCL_FUNC( clCreateBuffer, 0 );
+    LOAD_OCL_FUNC( clCreateCommandQueue, 0 );
+    LOAD_OCL_FUNC( clCreateContext, 0 );
+    LOAD_OCL_FUNC( clCreateImage2D, 0 );
+    LOAD_OCL_FUNC( clCreateKernel, 0 );
+    LOAD_OCL_FUNC( clCreateProgramWithBinary, 0 );
+    LOAD_OCL_FUNC( clCreateProgramWithSource, 0 );
+    LOAD_OCL_FUNC( clEnqueueCopyBuffer, 0 );
+    LOAD_OCL_FUNC( clEnqueueMapBuffer, 0 );
+    LOAD_OCL_FUNC( clEnqueueNDRangeKernel, 0 );
+    LOAD_OCL_FUNC( clEnqueueReadBuffer, 0 );
+    LOAD_OCL_FUNC( clEnqueueWriteBuffer, 0 );
+    LOAD_OCL_FUNC( clFinish, 0 );
+    LOAD_OCL_FUNC( clGetCommandQueueInfo, 0 );
+    LOAD_OCL_FUNC( clGetDeviceIDs, 0 );
+    LOAD_OCL_FUNC( clGetDeviceInfo, 0 );
+    LOAD_OCL_FUNC( clGetKernelWorkGroupInfo, 0 );
+    LOAD_OCL_FUNC( clGetPlatformIDs, 0 );
+    LOAD_OCL_FUNC( clGetProgramBuildInfo, 0 );
+    LOAD_OCL_FUNC( clGetProgramInfo, 0 );
+    LOAD_OCL_FUNC( clGetSupportedImageFormats, 0 );
+    LOAD_OCL_FUNC( clReleaseCommandQueue, 0 );
+    LOAD_OCL_FUNC( clReleaseContext, 0 );
+    LOAD_OCL_FUNC( clReleaseKernel, 0 );
+    LOAD_OCL_FUNC( clReleaseMemObject, 0 );
+    LOAD_OCL_FUNC( clReleaseProgram, 0 );
+    LOAD_OCL_FUNC( clSetKernelArg, 0 );
+    return ocl;
+#undef fail
+fail2:
+    ocl_close( ocl->library );
+fail1:
+    x264_free( ocl );
+fail0:
+    return NULL;
+}
+
+void x264_opencl_close_library( x264_opencl_function_t *ocl )
+{
+    if( !ocl )
+        return;
+    ocl_close( ocl->library );
+    x264_free( ocl );
+}
+
 /* define from recent cl_ext.h, copied here in case headers are old */
 #define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD        0x4042
 
 /* Requires full include path in case of out-of-tree builds */
 #include "common/oclobj.h"
 
-static int x264_detect_switchable_graphics();
+static int x264_detect_switchable_graphics( void );
 
 /* Try to load the cached compiled program binary, verify the device context is
  * still valid before reuse */
 static cl_program x264_opencl_cache_load( x264_t *h, char *devname, char *devvendor, char *driverversion )
 {
-    cl_program program = NULL;
-    cl_int status;
-
     /* try to load cached program binary */
     FILE *fp = fopen( h->param.psz_clbin_file, "rb" );
     if( !fp )
         return NULL;
 
-    fseek( fp, 0L, SEEK_END );
+    x264_opencl_function_t *ocl = h->opencl.ocl;
+    cl_program program = NULL;
+    cl_int status;
+
+    fseek( fp, 0, SEEK_END );
     size_t size = ftell( fp );
     rewind( fp );
     uint8_t *binary;
@@ -75,7 +155,7 @@ static cl_program x264_opencl_cache_load( x264_t *h, char *devname, char *devven
     CHECK_STRING( x264_opencl_source_hash );
 #undef CHECK_STRING
 
-    program = clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status );
+    program = ocl->clCreateProgramWithBinary( h->opencl.context, 1, &h->opencl.device, &size, &ptr, NULL, &status );
     if( status != CL_SUCCESS )
         program = NULL;
 
@@ -96,13 +176,14 @@ static void x264_opencl_cache_save( x264_t *h, cl_program program, char *devname
         return;
     }
 
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     size_t size;
-    cl_int status = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
+    cl_int status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
     if( status == CL_SUCCESS )
     {
         uint8_t *binary;
         CHECKED_MALLOC( binary, size );
-        status = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
+        status = ocl->clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
         if( status == CL_SUCCESS )
         {
             fputs( devname, fp );
@@ -133,15 +214,16 @@ fail:
  * compiled binary, stored in the current working folder. */
 static cl_program x264_opencl_compile( x264_t *h )
 {
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     cl_program program;
     cl_int status;
 
     char devname[64];
     char devvendor[64];
     char driverversion[64];
-    status  = clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME,    sizeof(devname), devname, NULL );
-    status |= clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR,  sizeof(devvendor), devvendor, NULL );
-    status |= clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL );
+    status  = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME,    sizeof(devname), devname, NULL );
+    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_VENDOR,  sizeof(devvendor), devvendor, NULL );
+    status |= ocl->clGetDeviceInfo( h->opencl.device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL );
     if( status != CL_SUCCESS )
         return NULL;
 
@@ -160,7 +242,7 @@ static cl_program x264_opencl_compile( x264_t *h )
 
         /* Detect AMD SouthernIsland or newer device (single-width registers) */
         cl_uint simdwidth = 4;
-        status = clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
+        status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
         if( status == CL_SUCCESS && simdwidth == 1 )
         {
             vectorize = 0;
@@ -177,7 +259,7 @@ static cl_program x264_opencl_compile( x264_t *h )
         x264_log( h, X264_LOG_INFO, "Compiling OpenCL kernels...\n" );
         const char *strptr = (const char*)x264_opencl_source;
         size_t size = sizeof(x264_opencl_source);
-        program = clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
+        program = ocl->clCreateProgramWithSource( h->opencl.context, 1, &strptr, &size, &status );
         if( status != CL_SUCCESS || !program )
         {
             x264_log( h, X264_LOG_WARNING, "OpenCL: unable to create program\n" );
@@ -187,7 +269,7 @@ static cl_program x264_opencl_compile( x264_t *h )
 
     /* Build the program binary for the OpenCL device */
     const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
-    status = clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
+    status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL );
     if( status == CL_SUCCESS )
     {
         x264_opencl_cache_save( h, program, devname, devvendor, driverversion );
@@ -198,7 +280,7 @@ static cl_program x264_opencl_compile( x264_t *h )
 
     size_t build_log_len = 0;
 
-    status = clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
+    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
     if( status != CL_SUCCESS )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
@@ -213,7 +295,7 @@ static cl_program x264_opencl_compile( x264_t *h )
         return NULL;
     }
 
-    status = clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
+    status = ocl->clGetProgramBuildInfo( program, h->opencl.device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
     if( status != CL_SUCCESS )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
@@ -234,42 +316,7 @@ fail:
     return NULL;
 }
 
-static void x264_opencl_free_lookahead( x264_t *h )
-{
-#define RELEASE( a, f ) if( a ) f( a );
-    RELEASE( h->opencl.intra_kernel, clReleaseKernel )
-    RELEASE( h->opencl.rowsum_intra_kernel, clReleaseKernel )
-    RELEASE( h->opencl.downscale_kernel1, clReleaseKernel )
-    RELEASE( h->opencl.downscale_kernel2, clReleaseKernel )
-    RELEASE( h->opencl.downscale_hpel_kernel, clReleaseKernel )
-    RELEASE( h->opencl.weightp_hpel_kernel, clReleaseKernel )
-    RELEASE( h->opencl.weightp_scaled_images_kernel, clReleaseKernel )
-    RELEASE( h->opencl.memset_kernel, clReleaseKernel )
-    RELEASE( h->opencl.hme_kernel, clReleaseKernel )
-    RELEASE( h->opencl.subpel_refine_kernel, clReleaseKernel )
-    RELEASE( h->opencl.mode_select_kernel, clReleaseKernel )
-    RELEASE( h->opencl.rowsum_inter_kernel, clReleaseKernel )
-    RELEASE( h->opencl.lookahead_program, clReleaseProgram )
-    RELEASE( h->opencl.row_satds[0], clReleaseMemObject )
-    RELEASE( h->opencl.row_satds[1], clReleaseMemObject )
-    RELEASE( h->opencl.frame_stats[0], clReleaseMemObject )
-    RELEASE( h->opencl.frame_stats[1], clReleaseMemObject )
-    RELEASE( h->opencl.mv_buffers[0], clReleaseMemObject )
-    RELEASE( h->opencl.mv_buffers[1], clReleaseMemObject )
-    RELEASE( h->opencl.mvp_buffer, clReleaseMemObject )
-    RELEASE( h->opencl.luma_16x16_image[0], clReleaseMemObject )
-    RELEASE( h->opencl.luma_16x16_image[1], clReleaseMemObject )
-    RELEASE( h->opencl.lowres_mv_costs, clReleaseMemObject )
-    RELEASE( h->opencl.lowres_costs[0], clReleaseMemObject )
-    RELEASE( h->opencl.lowres_costs[1], clReleaseMemObject )
-    RELEASE( h->opencl.page_locked_buffer, clReleaseMemObject )
-    RELEASE( h->opencl.weighted_luma_hpel, clReleaseMemObject )
-    for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
-        RELEASE( h->opencl.weighted_scaled_images[i], clReleaseMemObject )
-#undef RELEASE
-}
-
-int x264_opencl_init_lookahead( x264_t *h )
+static int x264_opencl_lookahead_alloc( x264_t *h )
 {
     if( !h->param.rc.i_lookahead )
         return -1;
@@ -288,6 +335,7 @@ int x264_opencl_init_lookahead( x264_t *h )
         "mode_selection",
         "sum_inter_cost"
     };
+
     cl_kernel *kernels[] = {
         &h->opencl.intra_kernel,
         &h->opencl.rowsum_intra_kernel,
@@ -302,43 +350,42 @@ int x264_opencl_init_lookahead( x264_t *h )
         &h->opencl.mode_select_kernel,
         &h->opencl.rowsum_inter_kernel
     };
+
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     cl_int status;
 
     h->opencl.lookahead_program = x264_opencl_compile( h );
     if( !h->opencl.lookahead_program )
-    {
-        x264_opencl_free_lookahead( h );
-        return -1;
-    }
+        goto fail;
 
     for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ )
     {
-        *kernels[i] = clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
+        *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status );
         if( status != CL_SUCCESS )
         {
             x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
-            x264_opencl_free_lookahead( h );
-            return -1;
+            goto fail;
         }
     }
 
-    h->opencl.page_locked_buffer = clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
+    h->opencl.page_locked_buffer = ocl->clCreateBuffer( h->opencl.context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status );
     if( status != CL_SUCCESS )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
-        x264_opencl_free_lookahead( h );
-        return -1;
+        goto fail;
     }
-    h->opencl.page_locked_ptr = clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
-                                                    0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
+    h->opencl.page_locked_ptr = ocl->clEnqueueMapBuffer( h->opencl.queue, h->opencl.page_locked_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                         0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status );
     if( status != CL_SUCCESS )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
-        x264_opencl_free_lookahead( h );
-        return -1;
+        goto fail;
     }
 
     return 0;
+fail:
+    x264_opencl_lookahead_delete( h );
+    return -1;
 }
 
 static void x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
@@ -352,13 +399,14 @@ static void x264_opencl_error_notify( const char *errinfo, const void *private_i
     x264_log( h, X264_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
 }
 
-int x264_opencl_init( x264_t *h )
+int x264_opencl_lookahead_init( x264_t *h )
 {
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     cl_int status;
     cl_uint numPlatforms;
     int ret = -1;
 
-    status = clGetPlatformIDs( 0, NULL, &numPlatforms );
+    status = ocl->clGetPlatformIDs( 0, NULL, &numPlatforms );
     if( status != CL_SUCCESS || numPlatforms == 0 )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
@@ -366,7 +414,7 @@ int x264_opencl_init( x264_t *h )
     }
 
     cl_platform_id *platforms = (cl_platform_id*)x264_malloc( numPlatforms * sizeof(cl_platform_id) );
-    status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+    status = ocl->clGetPlatformIDs( numPlatforms, platforms, NULL );
     if( status != CL_SUCCESS )
     {
         x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
@@ -379,7 +427,7 @@ int x264_opencl_init( x264_t *h )
     for( cl_uint i = 0; i < numPlatforms; ++i )
     {
         cl_uint gpu_count = 0;
-        status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
+        status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
         if( status != CL_SUCCESS || !gpu_count )
             continue;
 
@@ -387,7 +435,7 @@ int x264_opencl_init( x264_t *h )
         if( !devices )
             continue;
 
-        status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
+        status = ocl->clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
         if( status != CL_SUCCESS )
         {
             x264_free( devices );
@@ -406,30 +454,30 @@ int x264_opencl_init( x264_t *h )
                 continue;
 
             cl_bool image_support;
-            clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
+            ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
             if( !image_support )
                 continue;
 
-            cl_context context = clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
+            cl_context context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status );
             if( status != CL_SUCCESS )
                 continue;
 
             cl_uint imagecount = 0;
-            clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
+            ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
             if( !imagecount )
             {
-                clReleaseContext( context );
+                ocl->clReleaseContext( context );
                 continue;
             }
 
             cl_image_format *imageType = x264_malloc( sizeof(cl_image_format) * imagecount );
             if( !imageType )
             {
-                clReleaseContext( context );
+                ocl->clReleaseContext( context );
                 continue;
             }
 
-            clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
+            ocl->clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
 
             int b_has_r = 0;
             int b_has_rgba = 0;
@@ -446,14 +494,14 @@ int x264_opencl_init( x264_t *h )
             if( !b_has_r || !b_has_rgba )
             {
                 char devname[64];
-                status = clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
+                status = ocl->clGetDeviceInfo( h->opencl.device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
                 if( status == CL_SUCCESS )
                 {
                     /* emit warning if we are discarding the user's explicit choice */
                     int level = h->param.opencl_device_id ? X264_LOG_WARNING : X264_LOG_DEBUG;
                     x264_log( h, level, "OpenCL: %s does not support required image formats\n", devname);
                 }
-                clReleaseContext( context );
+                ocl->clReleaseContext( context );
                 continue;
             }
 
@@ -461,14 +509,14 @@ int x264_opencl_init( x264_t *h )
             if( h->param.i_opencl_device )
             {
                 h->param.i_opencl_device--;
-                clReleaseContext( context );
+                ocl->clReleaseContext( context );
                 continue;
             }
 
-            h->opencl.queue = clCreateCommandQueue( context, h->opencl.device, 0, &status );
+            h->opencl.queue = ocl->clCreateCommandQueue( context, h->opencl.device, 0, &status );
             if( status != CL_SUCCESS )
             {
-                clReleaseContext( context );
+                ocl->clReleaseContext( context );
                 continue;
             }
 
@@ -492,14 +540,82 @@ int x264_opencl_init( x264_t *h )
     if( ret )
         x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n");
     else
-        ret = x264_opencl_init_lookahead( h );
+        ret = x264_opencl_lookahead_alloc( h );
 
     return ret;
 }
 
+static void x264_opencl_lookahead_free( x264_t *h )
+{
+    x264_opencl_function_t *ocl = h->opencl.ocl;
+
+#define RELEASE( a, f ) do { if( a ) { ocl->f( a ); a = NULL; } } while( 0 )
+    RELEASE( h->opencl.downscale_hpel_kernel, clReleaseKernel );
+    RELEASE( h->opencl.downscale_kernel1, clReleaseKernel );
+    RELEASE( h->opencl.downscale_kernel2, clReleaseKernel );
+    RELEASE( h->opencl.weightp_hpel_kernel, clReleaseKernel );
+    RELEASE( h->opencl.weightp_scaled_images_kernel, clReleaseKernel );
+    RELEASE( h->opencl.memset_kernel, clReleaseKernel );
+    RELEASE( h->opencl.intra_kernel, clReleaseKernel );
+    RELEASE( h->opencl.rowsum_intra_kernel, clReleaseKernel );
+    RELEASE( h->opencl.hme_kernel, clReleaseKernel );
+    RELEASE( h->opencl.subpel_refine_kernel, clReleaseKernel );
+    RELEASE( h->opencl.mode_select_kernel, clReleaseKernel );
+    RELEASE( h->opencl.rowsum_inter_kernel, clReleaseKernel );
+
+    RELEASE( h->opencl.lookahead_program, clReleaseProgram );
+
+    RELEASE( h->opencl.page_locked_buffer, clReleaseMemObject );
+    RELEASE( h->opencl.luma_16x16_image[0], clReleaseMemObject );
+    RELEASE( h->opencl.luma_16x16_image[1], clReleaseMemObject );
+    for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
+        RELEASE( h->opencl.weighted_scaled_images[i], clReleaseMemObject );
+    RELEASE( h->opencl.weighted_luma_hpel, clReleaseMemObject );
+    RELEASE( h->opencl.row_satds[0], clReleaseMemObject );
+    RELEASE( h->opencl.row_satds[1], clReleaseMemObject );
+    RELEASE( h->opencl.mv_buffers[0], clReleaseMemObject );
+    RELEASE( h->opencl.mv_buffers[1], clReleaseMemObject );
+    RELEASE( h->opencl.lowres_mv_costs, clReleaseMemObject );
+    RELEASE( h->opencl.mvp_buffer, clReleaseMemObject );
+    RELEASE( h->opencl.lowres_costs[0], clReleaseMemObject );
+    RELEASE( h->opencl.lowres_costs[1], clReleaseMemObject );
+    RELEASE( h->opencl.frame_stats[0], clReleaseMemObject );
+    RELEASE( h->opencl.frame_stats[1], clReleaseMemObject );
+#undef RELEASE
+}
+
+void x264_opencl_lookahead_delete( x264_t *h )
+{
+    x264_opencl_function_t *ocl = h->opencl.ocl;
+
+    if( !ocl )
+        return;
+
+    if( h->opencl.queue )
+        ocl->clFinish( h->opencl.queue );
+
+    x264_opencl_lookahead_free( h );
+
+    if( h->opencl.queue )
+    {
+        ocl->clReleaseCommandQueue( h->opencl.queue );
+        h->opencl.queue = NULL;
+    }
+    if( h->opencl.context )
+    {
+        ocl->clReleaseContext( h->opencl.context );
+        h->opencl.context = NULL;
+    }
+}
+
 void x264_opencl_frame_delete( x264_frame_t *frame )
 {
-#define RELEASEBUF(mem) if( mem ) clReleaseMemObject( mem );
+    x264_opencl_function_t *ocl = frame->opencl.ocl;
+
+    if( !ocl )
+        return;
+
+#define RELEASEBUF(mem) do { if( mem ) { ocl->clReleaseMemObject( mem ); mem = NULL; } } while( 0 )
     for( int j = 0; j < NUM_IMAGE_SCALES; j++ )
         RELEASEBUF( frame->opencl.scaled_image2Ds[j] );
     RELEASEBUF( frame->opencl.luma_hpel );
@@ -512,73 +628,68 @@ void x264_opencl_frame_delete( x264_frame_t *frame )
 #undef RELEASEBUF
 }
 
-void x264_opencl_free( x264_t *h )
-{
-    if( h->opencl.queue )
-        clFinish(h->opencl.queue );
-
-    x264_opencl_free_lookahead( h );
-
-    if( h->opencl.queue )
-        clReleaseCommandQueue( h->opencl.queue );
-    if( h->opencl.context )
-        clReleaseContext( h->opencl.context );
-}
-
 /* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
  * we consult AMD's ADL interface to detect this situation and disable
  * OpenCL on these machines (Linux and Windows) */
-#ifndef _WIN32
-#define __stdcall
-#define HINSTANCE void *
+#ifdef _WIN32
+#define ADL_API_CALL
+#define ADL_CALLBACK __stdcall
+#define adl_close FreeLibrary
+#define adl_address GetProcAddress
+#else
+#define ADL_API_CALL
+#define ADL_CALLBACK
+#define adl_close dlclose
+#define adl_address dlsym
 #endif
-typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );
-typedef int ( *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
-typedef int ( *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
-typedef int ( *ADL_POWERXPRESS_SCHEME_GET ) ( int, int *, int *, int * );
-typedef int ( *ADL_MAIN_CONTROL_DESTROY )();
+
+typedef void* ( ADL_CALLBACK *ADL_MAIN_MALLOC_CALLBACK )( int );
+typedef int   ( ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )( ADL_MAIN_MALLOC_CALLBACK, int );
+typedef int   ( ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET )( int * );
+typedef int   ( ADL_API_CALL *ADL_POWERXPRESS_SCHEME_GET )( int, int *, int *, int * );
+typedef int   ( ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )( void );
+
 #define ADL_OK 0
 #define ADL_PX_SCHEME_DYNAMIC 2
 
-void* __stdcall adl_malloc_wrapper( int iSize ) { return x264_malloc( iSize ); }
+static void* ADL_CALLBACK adl_malloc_wrapper( int iSize )
+{
+    return x264_malloc( iSize );
+}
 
-static int x264_detect_switchable_graphics()
+static int x264_detect_switchable_graphics( void )
 {
+    void *hDLL;
     ADL_MAIN_CONTROL_CREATE          ADL_Main_Control_Create;
     ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
     ADL_POWERXPRESS_SCHEME_GET       ADL_PowerXpress_Scheme_Get;
     ADL_MAIN_CONTROL_DESTROY         ADL_Main_Control_Destroy;
-    HINSTANCE hDLL;
     int ret = 0;
 
-#if _WIN32
+#ifdef _WIN32
     hDLL = LoadLibrary( "atiadlxx.dll" );
     if( !hDLL )
         hDLL = LoadLibrary( "atiadlxy.dll" );
 #else
     hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
-#define GetProcAddress dlsym
 #endif
     if( !hDLL )
-        return ret;
+        goto fail0;
 
-    ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE) GetProcAddress(hDLL, "ADL_Main_Control_Create");
-    ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY) GetProcAddress(hDLL, "ADL_Main_Control_Destroy");
-    ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET) GetProcAddress(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
-    ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET) GetProcAddress(hDLL, "ADL_PowerXpress_Scheme_Get");
+    ADL_Main_Control_Create          = (ADL_MAIN_CONTROL_CREATE)adl_address(hDLL, "ADL_Main_Control_Create");
+    ADL_Main_Control_Destroy         = (ADL_MAIN_CONTROL_DESTROY)adl_address(hDLL, "ADL_Main_Control_Destroy");
+    ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)adl_address(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
+    ADL_PowerXpress_Scheme_Get       = (ADL_POWERXPRESS_SCHEME_GET)adl_address(hDLL, "ADL_PowerXpress_Scheme_Get");
     if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
         !ADL_PowerXpress_Scheme_Get )
-        goto bail;
+        goto fail1;
 
-    if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1) )
-        goto bail;
+    if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1 ) )
+        goto fail1;
 
     int numAdapters = 0;
     if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
-    {
-        ADL_Main_Control_Destroy();
-        goto bail;
-    }
+        goto fail2;
 
     for( int i = 0; i < numAdapters; i++ )
     {
@@ -593,14 +704,10 @@ static int x264_detect_switchable_graphics()
         }
     }
 
+fail2:
     ADL_Main_Control_Destroy();
-
-bail:
-#if _WIN32
-    FreeLibrary( hDLL );
-#else
-    dlclose( hDLL );
-#endif
-
+fail1:
+    adl_close( hDLL );
+fail0:
     return ret;
 }
index 86a30590bd1e3707f8d65ac82c43243f9226daef..426bf140ea12a18465b9780da0b60841ed77bbe2 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2012-2013 x264 project
  *
  * Authors: Steve Borho <sborho@multicorewareinc.com>
+ *          Anton Mitrofanov <BugMaster@narod.ru>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 #ifndef X264_OPENCL_H
 #define X264_OPENCL_H
 
-#include "x264.h"
-#include "common/common.h"
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+#include "extras/cl.h"
 
-#include <CL/cl.h>
+#define OCL_API(ret, attr, name) typedef ret (attr *name##_func)
+
+/* Platform API */
+OCL_API(cl_int, CL_API_CALL, clGetPlatformIDs)
+(   cl_uint          /* num_entries */,
+    cl_platform_id * /* platforms */,
+    cl_uint *        /* num_platforms */);
+
+OCL_API(cl_int, CL_API_CALL, clGetPlatformInfo)
+(   cl_platform_id   /* platform */,
+    cl_platform_info /* param_name */,
+    size_t           /* param_value_size */,
+    void *           /* param_value */,
+    size_t *         /* param_value_size_ret */);
+
+/* Device APIs */
+OCL_API(cl_int, CL_API_CALL, clGetDeviceIDs)
+(   cl_platform_id   /* platform */,
+    cl_device_type   /* device_type */,
+    cl_uint          /* num_entries */,
+    cl_device_id *   /* devices */,
+    cl_uint *        /* num_devices */);
+
+OCL_API(cl_int, CL_API_CALL, clGetDeviceInfo)
+(   cl_device_id    /* device */,
+    cl_device_info  /* param_name */,
+    size_t          /* param_value_size */,
+    void *          /* param_value */,
+    size_t *        /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clCreateSubDevices)
+(   cl_device_id                         /* in_device */,
+    const cl_device_partition_property * /* properties */,
+    cl_uint                              /* num_devices */,
+    cl_device_id *                       /* out_devices */,
+    cl_uint *                            /* num_devices_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainDevice)
+(   cl_device_id /* device */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseDevice)
+(   cl_device_id /* device */);
+
+/* Context APIs  */
+OCL_API(cl_context, CL_API_CALL, clCreateContext)
+(   const cl_context_properties * /* properties */,
+    cl_uint                 /* num_devices */,
+    const cl_device_id *    /* devices */,
+    void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+    void *                  /* user_data */,
+    cl_int *                /* errcode_ret */);
+
+OCL_API(cl_context, CL_API_CALL, clCreateContextFromType)
+(   const cl_context_properties * /* properties */,
+    cl_device_type          /* device_type */,
+    void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+    void *                  /* user_data */,
+    cl_int *                /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainContext)
+(   cl_context /* context */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseContext)
+(   cl_context /* context */);
+
+OCL_API(cl_int, CL_API_CALL, clGetContextInfo)
+(   cl_context         /* context */,
+    cl_context_info    /* param_name */,
+    size_t             /* param_value_size */,
+    void *             /* param_value */,
+    size_t *           /* param_value_size_ret */);
+
+/* Command Queue APIs */
+OCL_API(cl_command_queue, CL_API_CALL, clCreateCommandQueue)
+(   cl_context                     /* context */,
+    cl_device_id                   /* device */,
+    cl_command_queue_properties    /* properties */,
+    cl_int *                       /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainCommandQueue)
+(   cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseCommandQueue)
+(   cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clGetCommandQueueInfo)
+(   cl_command_queue      /* command_queue */,
+    cl_command_queue_info /* param_name */,
+    size_t                /* param_value_size */,
+    void *                /* param_value */,
+    size_t *              /* param_value_size_ret */);
+
+/* Memory Object APIs */
+OCL_API(cl_mem, CL_API_CALL, clCreateBuffer)
+(   cl_context   /* context */,
+    cl_mem_flags /* flags */,
+    size_t       /* size */,
+    void *       /* host_ptr */,
+    cl_int *     /* errcode_ret */);
+
+OCL_API(cl_mem, CL_API_CALL, clCreateSubBuffer)
+(   cl_mem                   /* buffer */,
+    cl_mem_flags             /* flags */,
+    cl_buffer_create_type    /* buffer_create_type */,
+    const void *             /* buffer_create_info */,
+    cl_int *                 /* errcode_ret */);
+
+OCL_API(cl_mem, CL_API_CALL, clCreateImage)
+(   cl_context              /* context */,
+    cl_mem_flags            /* flags */,
+    const cl_image_format * /* image_format */,
+    const cl_image_desc *   /* image_desc */,
+    void *                  /* host_ptr */,
+    cl_int *                /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainMemObject)
+(   cl_mem /* memobj */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseMemObject)
+(   cl_mem /* memobj */);
+
+OCL_API(cl_int, CL_API_CALL, clGetSupportedImageFormats)
+(   cl_context           /* context */,
+    cl_mem_flags         /* flags */,
+    cl_mem_object_type   /* image_type */,
+    cl_uint              /* num_entries */,
+    cl_image_format *    /* image_formats */,
+    cl_uint *            /* num_image_formats */);
+
+OCL_API(cl_int, CL_API_CALL, clGetMemObjectInfo)
+(   cl_mem           /* memobj */,
+    cl_mem_info      /* param_name */,
+    size_t           /* param_value_size */,
+    void *           /* param_value */,
+    size_t *         /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetImageInfo)
+(   cl_mem           /* image */,
+    cl_image_info    /* param_name */,
+    size_t           /* param_value_size */,
+    void *           /* param_value */,
+    size_t *         /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clSetMemObjectDestructorCallback)
+(   cl_mem /* memobj */,
+    void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+    void * /*user_data */ );
+
+/* Sampler APIs */
+OCL_API(cl_sampler, CL_API_CALL, clCreateSampler)
+(   cl_context          /* context */,
+    cl_bool             /* normalized_coords */,
+    cl_addressing_mode  /* addressing_mode */,
+    cl_filter_mode      /* filter_mode */,
+    cl_int *            /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainSampler)
+(   cl_sampler /* sampler */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseSampler)
+(   cl_sampler /* sampler */);
+
+OCL_API(cl_int, CL_API_CALL, clGetSamplerInfo)
+(   cl_sampler         /* sampler */,
+    cl_sampler_info    /* param_name */,
+    size_t             /* param_value_size */,
+    void *             /* param_value */,
+    size_t *           /* param_value_size_ret */);
+
+/* Program Object APIs  */
+OCL_API(cl_program, CL_API_CALL, clCreateProgramWithSource)
+(   cl_context        /* context */,
+    cl_uint           /* count */,
+    const char **     /* strings */,
+    const size_t *    /* lengths */,
+    cl_int *          /* errcode_ret */);
+
+OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBinary)
+(   cl_context                     /* context */,
+    cl_uint                        /* num_devices */,
+    const cl_device_id *           /* device_list */,
+    const size_t *                 /* lengths */,
+    const unsigned char **         /* binaries */,
+    cl_int *                       /* binary_status */,
+    cl_int *                       /* errcode_ret */);
+
+OCL_API(cl_program, CL_API_CALL, clCreateProgramWithBuiltInKernels)
+(   cl_context            /* context */,
+    cl_uint               /* num_devices */,
+    const cl_device_id *  /* device_list */,
+    const char *          /* kernel_names */,
+    cl_int *              /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainProgram)
+(   cl_program /* program */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseProgram)
+(   cl_program /* program */);
+
+OCL_API(cl_int, CL_API_CALL, clBuildProgram)
+(   cl_program           /* program */,
+    cl_uint              /* num_devices */,
+    const cl_device_id * /* device_list */,
+    const char *         /* options */,
+    void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+    void *               /* user_data */);
+
+OCL_API(cl_int, CL_API_CALL, clCompileProgram)
+(   cl_program           /* program */,
+    cl_uint              /* num_devices */,
+    const cl_device_id * /* device_list */,
+    const char *         /* options */,
+    cl_uint              /* num_input_headers */,
+    const cl_program *   /* input_headers */,
+    const char **        /* header_include_names */,
+    void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+    void *               /* user_data */);
+
+OCL_API(cl_program, CL_API_CALL, clLinkProgram)
+(   cl_context           /* context */,
+    cl_uint              /* num_devices */,
+    const cl_device_id * /* device_list */,
+    const char *         /* options */,
+    cl_uint              /* num_input_programs */,
+    const cl_program *   /* input_programs */,
+    void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+    void *               /* user_data */,
+    cl_int *             /* errcode_ret */ );
+
+
+OCL_API(cl_int, CL_API_CALL, clUnloadPlatformCompiler)
+(   cl_platform_id /* platform */);
+
+OCL_API(cl_int, CL_API_CALL, clGetProgramInfo)
+(   cl_program         /* program */,
+    cl_program_info    /* param_name */,
+    size_t             /* param_value_size */,
+    void *             /* param_value */,
+    size_t *           /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetProgramBuildInfo)
+(   cl_program            /* program */,
+    cl_device_id          /* device */,
+    cl_program_build_info /* param_name */,
+    size_t                /* param_value_size */,
+    void *                /* param_value */,
+    size_t *              /* param_value_size_ret */);
+
+/* Kernel Object APIs */
+OCL_API(cl_kernel, CL_API_CALL, clCreateKernel)
+(   cl_program      /* program */,
+    const char *    /* kernel_name */,
+    cl_int *        /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clCreateKernelsInProgram)
+(   cl_program     /* program */,
+    cl_uint        /* num_kernels */,
+    cl_kernel *    /* kernels */,
+    cl_uint *      /* num_kernels_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainKernel)
+(   cl_kernel    /* kernel */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseKernel)
+(   cl_kernel   /* kernel */);
+
+OCL_API(cl_int, CL_API_CALL, clSetKernelArg)
+(   cl_kernel    /* kernel */,
+    cl_uint      /* arg_index */,
+    size_t       /* arg_size */,
+    const void * /* arg_value */);
+
+OCL_API(cl_int, CL_API_CALL, clGetKernelInfo)
+(   cl_kernel       /* kernel */,
+    cl_kernel_info  /* param_name */,
+    size_t          /* param_value_size */,
+    void *          /* param_value */,
+    size_t *        /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetKernelArgInfo)
+(   cl_kernel       /* kernel */,
+    cl_uint         /* arg_indx */,
+    cl_kernel_arg_info  /* param_name */,
+    size_t          /* param_value_size */,
+    void *          /* param_value */,
+    size_t *        /* param_value_size_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clGetKernelWorkGroupInfo)
+(   cl_kernel                  /* kernel */,
+    cl_device_id               /* device */,
+    cl_kernel_work_group_info  /* param_name */,
+    size_t                     /* param_value_size */,
+    void *                     /* param_value */,
+    size_t *                   /* param_value_size_ret */);
+
+/* Event Object APIs */
+OCL_API(cl_int, CL_API_CALL, clWaitForEvents)
+(   cl_uint             /* num_events */,
+    const cl_event *    /* event_list */);
+
+OCL_API(cl_int, CL_API_CALL, clGetEventInfo)
+(   cl_event         /* event */,
+    cl_event_info    /* param_name */,
+    size_t           /* param_value_size */,
+    void *           /* param_value */,
+    size_t *         /* param_value_size_ret */);
+
+OCL_API(cl_event, CL_API_CALL, clCreateUserEvent)
+(   cl_context    /* context */,
+    cl_int *      /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clRetainEvent)
+(   cl_event /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clReleaseEvent)
+(   cl_event /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clSetUserEventStatus)
+(   cl_event   /* event */,
+    cl_int     /* execution_status */);
+
+OCL_API(cl_int, CL_API_CALL, clSetEventCallback)
+(   cl_event    /* event */,
+    cl_int      /* command_exec_callback_type */,
+    void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+    void *      /* user_data */);
+
+/* Profiling APIs */
+OCL_API(cl_int, CL_API_CALL, clGetEventProfilingInfo)
+(   cl_event            /* event */,
+    cl_profiling_info   /* param_name */,
+    size_t              /* param_value_size */,
+    void *              /* param_value */,
+    size_t *            /* param_value_size_ret */);
+
+/* Flush and Finish APIs */
+OCL_API(cl_int, CL_API_CALL, clFlush)
+(   cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clFinish)
+(   cl_command_queue /* command_queue */);
+
+/* Enqueued Commands APIs */
+OCL_API(cl_int, CL_API_CALL, clEnqueueReadBuffer)
+(   cl_command_queue    /* command_queue */,
+    cl_mem              /* buffer */,
+    cl_bool             /* blocking_read */,
+    size_t              /* offset */,
+    size_t              /* size */,
+    void *              /* ptr */,
+    cl_uint             /* num_events_in_wait_list */,
+    const cl_event *    /* event_wait_list */,
+    cl_event *          /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueReadBufferRect)
+(   cl_command_queue    /* command_queue */,
+    cl_mem              /* buffer */,
+    cl_bool             /* blocking_read */,
+    const size_t *      /* buffer_offset */,
+    const size_t *      /* host_offset */,
+    const size_t *      /* region */,
+    size_t              /* buffer_row_pitch */,
+    size_t              /* buffer_slice_pitch */,
+    size_t              /* host_row_pitch */,
+    size_t              /* host_slice_pitch */,
+    void *              /* ptr */,
+    cl_uint             /* num_events_in_wait_list */,
+    const cl_event *    /* event_wait_list */,
+    cl_event *          /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBuffer)
+(   cl_command_queue   /* command_queue */,
+    cl_mem             /* buffer */,
+    cl_bool            /* blocking_write */,
+    size_t             /* offset */,
+    size_t             /* size */,
+    const void *       /* ptr */,
+    cl_uint            /* num_events_in_wait_list */,
+    const cl_event *   /* event_wait_list */,
+    cl_event *         /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWriteBufferRect)
+(   cl_command_queue    /* command_queue */,
+    cl_mem              /* buffer */,
+    cl_bool             /* blocking_write */,
+    const size_t *      /* buffer_offset */,
+    const size_t *      /* host_offset */,
+    const size_t *      /* region */,
+    size_t              /* buffer_row_pitch */,
+    size_t              /* buffer_slice_pitch */,
+    size_t              /* host_row_pitch */,
+    size_t              /* host_slice_pitch */,
+    const void *        /* ptr */,
+    cl_uint             /* num_events_in_wait_list */,
+    const cl_event *    /* event_wait_list */,
+    cl_event *          /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueFillBuffer)
+(   cl_command_queue   /* command_queue */,
+    cl_mem             /* buffer */,
+    const void *       /* pattern */,
+    size_t             /* pattern_size */,
+    size_t             /* offset */,
+    size_t             /* size */,
+    cl_uint            /* num_events_in_wait_list */,
+    const cl_event *   /* event_wait_list */,
+    cl_event *         /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBuffer)
+(   cl_command_queue    /* command_queue */,
+    cl_mem              /* src_buffer */,
+    cl_mem              /* dst_buffer */,
+    size_t              /* src_offset */,
+    size_t              /* dst_offset */,
+    size_t              /* size */,
+    cl_uint             /* num_events_in_wait_list */,
+    const cl_event *    /* event_wait_list */,
+    cl_event *          /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferRect)
+(   cl_command_queue    /* command_queue */,
+    cl_mem              /* src_buffer */,
+    cl_mem              /* dst_buffer */,
+    const size_t *      /* src_origin */,
+    const size_t *      /* dst_origin */,
+    const size_t *      /* region */,
+    size_t              /* src_row_pitch */,
+    size_t              /* src_slice_pitch */,
+    size_t              /* dst_row_pitch */,
+    size_t              /* dst_slice_pitch */,
+    cl_uint             /* num_events_in_wait_list */,
+    const cl_event *    /* event_wait_list */,
+    cl_event *          /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueReadImage)
+(   cl_command_queue     /* command_queue */,
+    cl_mem               /* image */,
+    cl_bool              /* blocking_read */,
+    const size_t *       /* origin[3] */,
+    const size_t *       /* region[3] */,
+    size_t               /* row_pitch */,
+    size_t               /* slice_pitch */,
+    void *               /* ptr */,
+    cl_uint              /* num_events_in_wait_list */,
+    const cl_event *     /* event_wait_list */,
+    cl_event *           /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWriteImage)
+(   cl_command_queue    /* command_queue */,
+    cl_mem              /* image */,
+    cl_bool             /* blocking_write */,
+    const size_t *      /* origin[3] */,
+    const size_t *      /* region[3] */,
+    size_t              /* input_row_pitch */,
+    size_t              /* input_slice_pitch */,
+    const void *        /* ptr */,
+    cl_uint             /* num_events_in_wait_list */,
+    const cl_event *    /* event_wait_list */,
+    cl_event *          /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueFillImage)
+(   cl_command_queue   /* command_queue */,
+    cl_mem             /* image */,
+    const void *       /* fill_color */,
+    const size_t *     /* origin[3] */,
+    const size_t *     /* region[3] */,
+    cl_uint            /* num_events_in_wait_list */,
+    const cl_event *   /* event_wait_list */,
+    cl_event *         /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImage)
+(   cl_command_queue     /* command_queue */,
+    cl_mem               /* src_image */,
+    cl_mem               /* dst_image */,
+    const size_t *       /* src_origin[3] */,
+    const size_t *       /* dst_origin[3] */,
+    const size_t *       /* region[3] */,
+    cl_uint              /* num_events_in_wait_list */,
+    const cl_event *     /* event_wait_list */,
+    cl_event *           /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyImageToBuffer)
+(   cl_command_queue /* command_queue */,
+    cl_mem           /* src_image */,
+    cl_mem           /* dst_buffer */,
+    const size_t *   /* src_origin[3] */,
+    const size_t *   /* region[3] */,
+    size_t           /* dst_offset */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event * /* event_wait_list */,
+    cl_event *       /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueCopyBufferToImage)
+(   cl_command_queue /* command_queue */,
+    cl_mem           /* src_buffer */,
+    cl_mem           /* dst_image */,
+    size_t           /* src_offset */,
+    const size_t *   /* dst_origin[3] */,
+    const size_t *   /* region[3] */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event * /* event_wait_list */,
+    cl_event *       /* event */);
+
+OCL_API(void *, CL_API_CALL, clEnqueueMapBuffer)
+(   cl_command_queue /* command_queue */,
+    cl_mem           /* buffer */,
+    cl_bool          /* blocking_map */,
+    cl_map_flags     /* map_flags */,
+    size_t           /* offset */,
+    size_t           /* size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event * /* event_wait_list */,
+    cl_event *       /* event */,
+    cl_int *         /* errcode_ret */);
+
+OCL_API(void *, CL_API_CALL, clEnqueueMapImage)
+(   cl_command_queue  /* command_queue */,
+    cl_mem            /* image */,
+    cl_bool           /* blocking_map */,
+    cl_map_flags      /* map_flags */,
+    const size_t *    /* origin[3] */,
+    const size_t *    /* region[3] */,
+    size_t *          /* image_row_pitch */,
+    size_t *          /* image_slice_pitch */,
+    cl_uint           /* num_events_in_wait_list */,
+    const cl_event *  /* event_wait_list */,
+    cl_event *        /* event */,
+    cl_int *          /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueUnmapMemObject)
+(   cl_command_queue /* command_queue */,
+    cl_mem           /* memobj */,
+    void *           /* mapped_ptr */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event *  /* event_wait_list */,
+    cl_event *        /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueMigrateMemObjects)
+(   cl_command_queue       /* command_queue */,
+    cl_uint                /* num_mem_objects */,
+    const cl_mem *         /* mem_objects */,
+    cl_mem_migration_flags /* flags */,
+    cl_uint                /* num_events_in_wait_list */,
+    const cl_event *       /* event_wait_list */,
+    cl_event *             /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueNDRangeKernel)
+(   cl_command_queue /* command_queue */,
+    cl_kernel        /* kernel */,
+    cl_uint          /* work_dim */,
+    const size_t *   /* global_work_offset */,
+    const size_t *   /* global_work_size */,
+    const size_t *   /* local_work_size */,
+    cl_uint          /* num_events_in_wait_list */,
+    const cl_event * /* event_wait_list */,
+    cl_event *       /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueTask)
+(   cl_command_queue  /* command_queue */,
+    cl_kernel         /* kernel */,
+    cl_uint           /* num_events_in_wait_list */,
+    const cl_event *  /* event_wait_list */,
+    cl_event *        /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueNativeKernel)
+(   cl_command_queue  /* command_queue */,
+    void (CL_CALLBACK * /*user_func*/)(void *),
+    void *            /* args */,
+    size_t            /* cb_args */,
+    cl_uint           /* num_mem_objects */,
+    const cl_mem *    /* mem_list */,
+    const void **     /* args_mem_loc */,
+    cl_uint           /* num_events_in_wait_list */,
+    const cl_event *  /* event_wait_list */,
+    cl_event *        /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueMarkerWithWaitList)
+(   cl_command_queue /* command_queue */,
+    cl_uint           /* num_events_in_wait_list */,
+    const cl_event *  /* event_wait_list */,
+    cl_event *        /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueBarrierWithWaitList)
+(   cl_command_queue /* command_queue */,
+    cl_uint           /* num_events_in_wait_list */,
+    const cl_event *  /* event_wait_list */,
+    cl_event *        /* event */);
+
+
+/* Extension function access
+*
+* Returns the extension function address for the given function name,
+* or NULL if a valid function can not be found.  The client must
+* check to make sure the address is not NULL, before using or
+* calling the returned function address.
+*/
+OCL_API(void *, CL_API_CALL, clGetExtensionFunctionAddressForPlatform)
+(   cl_platform_id /* platform */,
+    const char *   /* func_name */);
+
+
+// Deprecated OpenCL 1.1 APIs
+OCL_API(cl_mem, CL_API_CALL, clCreateImage2D)
+(   cl_context              /* context */,
+    cl_mem_flags            /* flags */,
+    const cl_image_format * /* image_format */,
+    size_t                  /* image_width */,
+    size_t                  /* image_height */,
+    size_t                  /* image_row_pitch */,
+    void *                  /* host_ptr */,
+    cl_int *                /* errcode_ret */);
+
+OCL_API(cl_mem, CL_API_CALL, clCreateImage3D)
+(   cl_context              /* context */,
+    cl_mem_flags            /* flags */,
+    const cl_image_format * /* image_format */,
+    size_t                  /* image_width */,
+    size_t                  /* image_height */,
+    size_t                  /* image_depth */,
+    size_t                  /* image_row_pitch */,
+    size_t                  /* image_slice_pitch */,
+    void *                  /* host_ptr */,
+    cl_int *                /* errcode_ret */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueMarker)
+(   cl_command_queue    /* command_queue */,
+    cl_event *          /* event */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueWaitForEvents)
+(   cl_command_queue /* command_queue */,
+    cl_uint          /* num_events */,
+    const cl_event * /* event_list */);
+
+OCL_API(cl_int, CL_API_CALL, clEnqueueBarrier)
+(   cl_command_queue /* command_queue */);
+
+OCL_API(cl_int, CL_API_CALL, clUnloadCompiler)
+(   void);
+
+OCL_API(void *, CL_API_CALL, clGetExtensionFunctionAddress)
+(   const char * /* func_name */);
+
+#define OCL_DECLARE_FUNC(name) name##_func name
+
+typedef struct
+{
+    void *library;
+
+    OCL_DECLARE_FUNC( clBuildProgram );
+    OCL_DECLARE_FUNC( clCreateBuffer );
+    OCL_DECLARE_FUNC( clCreateCommandQueue );
+    OCL_DECLARE_FUNC( clCreateContext );
+    OCL_DECLARE_FUNC( clCreateImage2D );
+    OCL_DECLARE_FUNC( clCreateKernel );
+    OCL_DECLARE_FUNC( clCreateProgramWithBinary );
+    OCL_DECLARE_FUNC( clCreateProgramWithSource );
+    OCL_DECLARE_FUNC( clEnqueueCopyBuffer );
+    OCL_DECLARE_FUNC( clEnqueueMapBuffer );
+    OCL_DECLARE_FUNC( clEnqueueNDRangeKernel );
+    OCL_DECLARE_FUNC( clEnqueueReadBuffer );
+    OCL_DECLARE_FUNC( clEnqueueWriteBuffer );
+    OCL_DECLARE_FUNC( clFinish );
+    OCL_DECLARE_FUNC( clGetCommandQueueInfo );
+    OCL_DECLARE_FUNC( clGetDeviceIDs );
+    OCL_DECLARE_FUNC( clGetDeviceInfo );
+    OCL_DECLARE_FUNC( clGetKernelWorkGroupInfo );
+    OCL_DECLARE_FUNC( clGetPlatformIDs );
+    OCL_DECLARE_FUNC( clGetProgramBuildInfo );
+    OCL_DECLARE_FUNC( clGetProgramInfo );
+    OCL_DECLARE_FUNC( clGetSupportedImageFormats );
+    OCL_DECLARE_FUNC( clReleaseCommandQueue );
+    OCL_DECLARE_FUNC( clReleaseContext );
+    OCL_DECLARE_FUNC( clReleaseKernel );
+    OCL_DECLARE_FUNC( clReleaseMemObject );
+    OCL_DECLARE_FUNC( clReleaseProgram );
+    OCL_DECLARE_FUNC( clSetKernelArg );
+} x264_opencl_function_t;
 
 /* Number of downscale resolutions to use for motion search */
 #define NUM_IMAGE_SCALES 4
 
 typedef struct
 {
+    x264_opencl_function_t *ocl;
+
     cl_context       context;
     cl_device_id     device;
     cl_command_queue queue;
@@ -100,6 +779,8 @@ typedef struct
 
 typedef struct
 {
+    x264_opencl_function_t *ocl;
+
     cl_mem scaled_image2Ds[NUM_IMAGE_SCALES];
     cl_mem luma_hpel;
     cl_mem inv_qscale_factor;
@@ -112,9 +793,12 @@ typedef struct
 
 typedef struct x264_frame x264_frame;
 
-int x264_opencl_init( x264_t *h );
-int x264_opencl_init_lookahead( x264_t *h );
-void x264_opencl_free( x264_t *h );
+x264_opencl_function_t *x264_opencl_load_library( void );
+void x264_opencl_close_library( x264_opencl_function_t *ocl );
+
+int x264_opencl_lookahead_init( x264_t *h );
+void x264_opencl_lookahead_delete( x264_t *h );
+
 void x264_opencl_frame_delete( x264_frame *frame );
 
 #endif
index 3d14850909956cc16edebaabf206eeef41344b41..f2004143e504297d188441e27e682ae7fb91db8c 100755 (executable)
--- a/configure
+++ b/configure
@@ -287,7 +287,7 @@ cross_prefix=""
 EXE=""
 
 # list of all preprocessor HAVE values we can define
-CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT"
+CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL"
 
 # parse options
 
@@ -1022,6 +1022,30 @@ ASFLAGS="$ASFLAGS -DBIT_DEPTH=$bit_depth"
 
 [ $interlaced = yes ] && define HAVE_INTERLACED && x264_interlaced=1 || x264_interlaced=0
 
+libdl=""
+if [ "$opencl" = "yes" ]; then
+    opencl="no"
+    log_check "for perl"
+    output=$(perl -v)
+    if [ "$output" = "" ]; then
+        log_fail
+        echo 'OpenCL support requires perl to compile.'
+        echo 'use --disable-opencl to compile without OpenCL.'
+        exit 1
+    fi
+    log_ok
+    # cygwin can use opencl if it can use LoadLibrary
+    if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibrary(0);") ; then
+        opencl="yes"
+        define HAVE_OPENCL
+    elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then
+        opencl="yes"
+        define HAVE_OPENCL
+        libdl="-ldl"
+    fi
+    LDFLAGS="$LDFLAGS $libdl"
+fi
+
 #define undefined vars as 0
 for var in $CONFIG_HAVE; do
     grep -q "HAVE_$var 1" config.h || define HAVE_$var 0
@@ -1113,62 +1137,9 @@ PROF_GEN_CC=$PROF_GEN_CC
 PROF_GEN_LD=$PROF_GEN_LD
 PROF_USE_CC=$PROF_USE_CC
 PROF_USE_LD=$PROF_USE_LD
+HAVE_OPENCL=$opencl
 EOF
 
-if [[ $host_os != mingw* ]]; then
-    # OpenCL support is only well tested on Windows/MinGW.  If you
-    # wish to try it on an unsupported platform, swap the lines
-    # below.  If OpenCL breaks, you get to keep both halves
-    #opencl="yes"
-    opencl="no"
-fi
-if [ "$opencl" = "yes" ]; then
-    log_check "looking for perl"
-    output=$(perl -v)
-    if [ "$output" = "" ]; then
-        echo 'OpenCL support requires perl to compile.'
-        echo 'use --disable-opencl to compile without OpenCL.'
-        exit 1
-    elif [[ $cross_prefix != ""  &&  $host_os == mingw* ]] ; then
-        if cc_check "CL/cl.h" "-lOpenCL"; then
-            echo 'HAVE_OPENCL=yes' >> config.mak
-            echo 'OPENCL_LIB=OpenCL' >> config.mak
-            echo "OPENCL_INC_DIR=." >> config.mak
-            echo "OPENCL_LIB_DIR=." >> config.mak
-            define HAVE_OPENCL
-        else
-            opencl="no"
-        fi
-    elif [ "$CUDA_PATH" != "" ]; then
-        echo 'HAVE_OPENCL=yes' >> config.mak
-        echo 'OPENCL_LIB=OpenCL' >> config.mak
-        echo 'OPENCL_INC_DIR=$(CUDA_PATH)include' >> config.mak
-        if [ "$ARCH" = "X86" ]; then
-            echo 'OPENCL_LIB_DIR=$(CUDA_PATH)lib/Win32' >> config.mak
-        else
-            echo 'OPENCL_LIB_DIR=$(CUDA_PATH)lib/x64' >> config.mak
-        fi
-        define HAVE_OPENCL
-    elif [ -e "$AMDAPPSDKROOT/include/CL/cl.h" ]; then
-        if [[ $host_os = mingw* ]]; then
-            app_path=`echo "/$AMDAPPSDKROOT" | sed 's/\\\/\//g' | sed 's/://'`
-        else
-            app_path='$(AMDAPPSDKROOT)'
-       fi
-        echo 'HAVE_OPENCL=yes' >> config.mak
-        echo 'OPENCL_LIB=OpenCL' >> config.mak
-        echo OPENCL_INC_DIR=$app_path/include >> config.mak
-        if [ "$ARCH" = "X86" ]; then
-            echo OPENCL_LIB_DIR=$app_path/lib/x86 >> config.mak
-        else
-            echo OPENCL_LIB_DIR=$app_path/lib/x86_64 >> config.mak
-        fi
-        define HAVE_OPENCL
-    else
-        opencl="no"
-    fi
-fi
-
 if [ $compiler = ICL ]; then
     echo '%.o: %.c' >> config.mak
     echo '     $(CC) $(CFLAGS) -c -Fo$@ $<' >> config.mak
@@ -1246,7 +1217,7 @@ Name: x264
 Description: H.264 (MPEG4 AVC) encoder library
 Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//')
 Libs: -L$libdir -lx264
-Libs.private: $libpthread $libm
+Libs.private: $libpthread $libm $libdl
 Cflags: -I$includedir
 EOF
 
@@ -1270,6 +1241,7 @@ ffms:          $ffms
 gpac:          $gpac
 gpl:           $gpl
 thread:        $thread
+opencl:        $opencl
 filters:       $filters
 debug:         $debug
 gprof:         $gprof
@@ -1278,7 +1250,6 @@ PIC:           $pic
 visualize:     $vis
 bit depth:     $bit_depth
 chroma format: $chroma_format
-opencl:        $opencl
 EOF
 
 echo >> config.log
index f9afb90fc4d2f1c6dc40977ce99651e73375bf63..ffef9c1ecc80e8df8bd488696d28097a5bac5317 100644 (file)
 #include "common/visualize.h"
 #endif
 
-#if HAVE_OPENCL
-#include "common/opencl.h"
-#endif
-
 //#define DEBUG_MB_TYPE
 
 #define bs_write_ue bs_write_ue_big
@@ -1391,6 +1387,18 @@ x264_t *x264_encoder_open( x264_param_t *param )
         x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) )
         goto fail;
 
+#if HAVE_OPENCL
+    if( h->param.b_opencl )
+    {
+        h->opencl.ocl = x264_opencl_load_library();
+        if( !h->opencl.ocl )
+        {
+            x264_log( h, X264_LOG_WARNING, "failed to load OpenCL\n" );
+            h->param.b_opencl = 0;
+        }
+    }
+#endif
+
     h->thread[0] = h;
     for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
         CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
@@ -1432,7 +1440,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
     }
 
 #if HAVE_OPENCL
-    if( h->param.b_opencl && x264_opencl_init( h ) < 0 )
+    if( h->param.b_opencl && x264_opencl_lookahead_init( h ) < 0 )
         h->param.b_opencl = 0;
 #endif
 
@@ -3649,12 +3657,13 @@ void    x264_encoder_close  ( x264_t *h )
                    || h->stat.i_mb_count[SLICE_TYPE_P][I_PCM]
                    || h->stat.i_mb_count[SLICE_TYPE_B][I_PCM];
 
+    x264_lookahead_delete( h );
+
 #if HAVE_OPENCL
-    x264_opencl_free( h );
+    x264_opencl_lookahead_delete( h );
+    x264_opencl_function_t *ocl = h->opencl.ocl;
 #endif
 
-    x264_lookahead_delete( h );
-
     if( h->param.b_sliced_threads )
         x264_threadpool_wait_all( h );
     if( h->param.i_threads > 1 )
@@ -4004,6 +4013,9 @@ void    x264_encoder_close  ( x264_t *h )
         x264_pthread_cond_destroy( &h->thread[i]->cv );
         x264_free( h->thread[i] );
     }
+#if HAVE_OPENCL
+    x264_opencl_close_library( ocl );
+#endif
 }
 
 int x264_encoder_delayed_frames( x264_t *h )
index 1991060445d89009970e7593213e366911171af4..17650c5f8ec82f36653e17a5bff99267721d5510 100644 (file)
@@ -28,7 +28,7 @@
 #include "me.h"
 
 #if HAVE_OPENCL
-#if _WIN32
+#ifdef _WIN32
 #include <windows.h>
 #endif
 
@@ -40,7 +40,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
 
 #define OCLCHECK( method, ... )\
-    status = method( __VA_ARGS__ );\
+    status = ocl->method( __VA_ARGS__ );\
     if( status != CL_SUCCESS ) {\
         h->param.b_opencl = 0;\
         h->opencl.b_fatal_error = 1;\
@@ -50,7 +50,9 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
 
 void x264_opencl_flush( x264_t *h )
 {
-    clFinish( h->opencl.queue );
+    x264_opencl_function_t *ocl = h->opencl.ocl;
+
+    ocl->clFinish( h->opencl.queue );
 
     /* Finish copies from the GPU by copying from the page-locked buffer to
      * their final destination */
@@ -76,13 +78,14 @@ int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda )
         return 0;
     fenc->b_intra_calculated = 1;
 
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     int luma_length = fenc->i_stride[0] * fenc->i_lines[0];
 
 #define CREATEBUF( out, flags, size )\
-    out = clCreateBuffer( h->opencl.context, (flags), (size), NULL, &status );\
+    out = ocl->clCreateBuffer( h->opencl.context, (flags), (size), NULL, &status );\
     if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateBuffer error '%d'\n", status ); return -1; }
 #define CREATEIMAGE( out, flags, pf, width, height )\
-    out = clCreateImage2D( h->opencl.context, (flags), &pf, width, height, 0, NULL, &status );\
+    out = ocl->clCreateImage2D( h->opencl.context, (flags), &pf, width, height, 0, NULL, &status );\
     if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateImage2D error '%d'\n", status ); return -1; }
 
     int mb_count = h->mb.i_mb_count;
@@ -277,15 +280,16 @@ int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda )
  * applications will have self-tuning code to try many possible variables and
  * measure the runtime.  Here we simply make an educated guess based on what we
  * know GPUs typically prefer.  */
-static void x264_optimal_launch_dims( size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device )
+static void x264_optimal_launch_dims( x264_t *h, size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device )
 {
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     size_t max_work_group = 256;    /* reasonable defaults for OpenCL 1.0 devices, below APIs may fail */
     size_t preferred_multiple = 64;
     cl_uint num_cus = 6;
 
-    clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group, NULL );
-    clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &preferred_multiple, NULL );
-    clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cus, NULL );
+    ocl->clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group, NULL );
+    ocl->clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &preferred_multiple, NULL );
+    ocl->clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cus, NULL );
 
     ldims[0] = preferred_multiple;
     ldims[1] = 8;
@@ -336,6 +340,7 @@ static void x264_optimal_launch_dims( size_t *gdims, size_t *ldims, const cl_ker
 
 int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w )
 {
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     x264_frame_t *fenc = frames[b];
     x264_frame_t *fref = frames[ref];
 
@@ -414,7 +419,7 @@ int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref,
         if( gdims[0] < 2 || gdims[1] < 2 )
             continue;
         gdims[0] <<= 2;
-        x264_optimal_launch_dims( gdims, ldims, h->opencl.hme_kernel, h->opencl.device );
+        x264_optimal_launch_dims( h, gdims, ldims, h->opencl.hme_kernel, h->opencl.device );
 
         mb_per_group = (ldims[0] >> 2) * ldims[1];
         cost_local_size = 4 * mb_per_group * sizeof(int16_t);
@@ -526,6 +531,7 @@ int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref,
 
 int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor )
 {
+    x264_opencl_function_t *ocl = h->opencl.ocl;
     cl_int status;
     x264_frame_t *fenc = frames[b];
     x264_frame_t *fref0 = frames[p0];
@@ -548,7 +554,7 @@ int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int
         /* For B frames, use 4 threads per MB for BIDIR checks */
         ldims = ldim_bidir;
         gdims[0] <<= 2;
-        x264_optimal_launch_dims( gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device );
+        x264_optimal_launch_dims( h, gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device );
         int mb_per_group = (ldims[0] >> 2) * ldims[1];
         cost_local_size = 4 * mb_per_group * sizeof(int16_t);
         satd_local_size = 16 * mb_per_group * sizeof(uint32_t);
@@ -640,7 +646,7 @@ void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frame
 {
     if( h->param.b_opencl )
     {
-#if _WIN32
+#ifdef _WIN32
         /* Temporarily boost priority of this lookahead thread and the OpenCL
          * driver's thread until the end of this function.  On AMD GPUs this
          * greatly reduces the latency of enqueuing kernels and getting results
@@ -648,7 +654,8 @@ void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frame
         HANDLE id = GetCurrentThread();
         h->opencl.lookahead_thread_pri = GetThreadPriority( id );
         SetThreadPriority( id, THREAD_PRIORITY_ABOVE_NORMAL );
-        cl_int status = clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
+        x264_opencl_function_t *ocl = h->opencl.ocl;
+        cl_int status = ocl->clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
         if( status == CL_SUCCESS )
         {
             h->opencl.opencl_thread_pri = GetThreadPriority( id );
@@ -699,12 +706,13 @@ void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frame
 
 void x264_opencl_slicetype_end( x264_t *h )
 {
-#if _WIN32
+#ifdef _WIN32
     if( h->param.b_opencl )
     {
         HANDLE id = GetCurrentThread();
         SetThreadPriority( id, h->opencl.lookahead_thread_pri );
-        cl_int status = clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
+        x264_opencl_function_t *ocl = h->opencl.ocl;
+        cl_int status = ocl->clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
         if( status == CL_SUCCESS )
             SetThreadPriority( id, h->opencl.opencl_thread_pri );
     }
diff --git a/extras/cl.h b/extras/cl.h
new file mode 100644 (file)
index 0000000..f543257
--- /dev/null
@@ -0,0 +1,1209 @@
+/*******************************************************************************
+ * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+#ifndef __OPENCL_CL_H
+#define __OPENCL_CL_H
+
+#include "cl_platform.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+
+typedef struct _cl_platform_id *    cl_platform_id;
+typedef struct _cl_device_id *      cl_device_id;
+typedef struct _cl_context *        cl_context;
+typedef struct _cl_command_queue *  cl_command_queue;
+typedef struct _cl_mem *            cl_mem;
+typedef struct _cl_program *        cl_program;
+typedef struct _cl_kernel *         cl_kernel;
+typedef struct _cl_event *          cl_event;
+typedef struct _cl_sampler *        cl_sampler;
+
+typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
+typedef cl_ulong            cl_bitfield;
+typedef cl_bitfield         cl_device_type;
+typedef cl_uint             cl_platform_info;
+typedef cl_uint             cl_device_info;
+typedef cl_bitfield         cl_device_fp_config;
+typedef cl_uint             cl_device_mem_cache_type;
+typedef cl_uint             cl_device_local_mem_type;
+typedef cl_bitfield         cl_device_exec_capabilities;
+typedef cl_bitfield         cl_command_queue_properties;
+typedef intptr_t            cl_device_partition_property;
+typedef cl_bitfield         cl_device_affinity_domain;
+
+typedef intptr_t            cl_context_properties;
+typedef cl_uint             cl_context_info;
+typedef cl_uint             cl_command_queue_info;
+typedef cl_uint             cl_channel_order;
+typedef cl_uint             cl_channel_type;
+typedef cl_bitfield         cl_mem_flags;
+typedef cl_uint             cl_mem_object_type;
+typedef cl_uint             cl_mem_info;
+typedef cl_bitfield         cl_mem_migration_flags;
+typedef cl_uint             cl_image_info;
+typedef cl_uint             cl_buffer_create_type;
+typedef cl_uint             cl_addressing_mode;
+typedef cl_uint             cl_filter_mode;
+typedef cl_uint             cl_sampler_info;
+typedef cl_bitfield         cl_map_flags;
+typedef cl_uint             cl_program_info;
+typedef cl_uint             cl_program_build_info;
+typedef cl_uint             cl_program_binary_type;
+typedef cl_int              cl_build_status;
+typedef cl_uint             cl_kernel_info;
+typedef cl_uint             cl_kernel_arg_info;
+typedef cl_uint             cl_kernel_arg_address_qualifier;
+typedef cl_uint             cl_kernel_arg_access_qualifier;
+typedef cl_bitfield         cl_kernel_arg_type_qualifier;
+typedef cl_uint             cl_kernel_work_group_info;
+typedef cl_uint             cl_event_info;
+typedef cl_uint             cl_command_type;
+typedef cl_uint             cl_profiling_info;
+
+
+typedef struct _cl_image_format {
+    cl_channel_order        image_channel_order;
+    cl_channel_type         image_channel_data_type;
+} cl_image_format;
+
+typedef struct _cl_image_desc {
+    cl_mem_object_type      image_type;
+    size_t                  image_width;
+    size_t                  image_height;
+    size_t                  image_depth;
+    size_t                  image_array_size;
+    size_t                  image_row_pitch;
+    size_t                  image_slice_pitch;
+    cl_uint                 num_mip_levels;
+    cl_uint                 num_samples;
+    cl_mem                  buffer;
+} cl_image_desc;
+
+typedef struct _cl_buffer_region {
+    size_t                  origin;
+    size_t                  size;
+} cl_buffer_region;
+
+
+/******************************************************************************/
+
+/* Error Codes */
+#define CL_SUCCESS                                  0
+#define CL_DEVICE_NOT_FOUND                         -1
+#define CL_DEVICE_NOT_AVAILABLE                     -2
+#define CL_COMPILER_NOT_AVAILABLE                   -3
+#define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
+#define CL_OUT_OF_RESOURCES                         -5
+#define CL_OUT_OF_HOST_MEMORY                       -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
+#define CL_MEM_COPY_OVERLAP                         -8
+#define CL_IMAGE_FORMAT_MISMATCH                    -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
+#define CL_BUILD_PROGRAM_FAILURE                    -11
+#define CL_MAP_FAILURE                              -12
+#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
+#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
+#define CL_COMPILE_PROGRAM_FAILURE                  -15
+#define CL_LINKER_NOT_AVAILABLE                     -16
+#define CL_LINK_PROGRAM_FAILURE                     -17
+#define CL_DEVICE_PARTITION_FAILED                  -18
+#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
+
+#define CL_INVALID_VALUE                            -30
+#define CL_INVALID_DEVICE_TYPE                      -31
+#define CL_INVALID_PLATFORM                         -32
+#define CL_INVALID_DEVICE                           -33
+#define CL_INVALID_CONTEXT                          -34
+#define CL_INVALID_QUEUE_PROPERTIES                 -35
+#define CL_INVALID_COMMAND_QUEUE                    -36
+#define CL_INVALID_HOST_PTR                         -37
+#define CL_INVALID_MEM_OBJECT                       -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
+#define CL_INVALID_IMAGE_SIZE                       -40
+#define CL_INVALID_SAMPLER                          -41
+#define CL_INVALID_BINARY                           -42
+#define CL_INVALID_BUILD_OPTIONS                    -43
+#define CL_INVALID_PROGRAM                          -44
+#define CL_INVALID_PROGRAM_EXECUTABLE               -45
+#define CL_INVALID_KERNEL_NAME                      -46
+#define CL_INVALID_KERNEL_DEFINITION                -47
+#define CL_INVALID_KERNEL                           -48
+#define CL_INVALID_ARG_INDEX                        -49
+#define CL_INVALID_ARG_VALUE                        -50
+#define CL_INVALID_ARG_SIZE                         -51
+#define CL_INVALID_KERNEL_ARGS                      -52
+#define CL_INVALID_WORK_DIMENSION                   -53
+#define CL_INVALID_WORK_GROUP_SIZE                  -54
+#define CL_INVALID_WORK_ITEM_SIZE                   -55
+#define CL_INVALID_GLOBAL_OFFSET                    -56
+#define CL_INVALID_EVENT_WAIT_LIST                  -57
+#define CL_INVALID_EVENT                            -58
+#define CL_INVALID_OPERATION                        -59
+#define CL_INVALID_GL_OBJECT                        -60
+#define CL_INVALID_BUFFER_SIZE                      -61
+#define CL_INVALID_MIP_LEVEL                        -62
+#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
+#define CL_INVALID_PROPERTY                         -64
+#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
+#define CL_INVALID_COMPILER_OPTIONS                 -66
+#define CL_INVALID_LINKER_OPTIONS                   -67
+#define CL_INVALID_DEVICE_PARTITION_COUNT           -68
+
+/* OpenCL Version */
+#define CL_VERSION_1_0                              1
+#define CL_VERSION_1_1                              1
+#define CL_VERSION_1_2                              1
+
+/* cl_bool */
+#define CL_FALSE                                    0
+#define CL_TRUE                                     1
+#define CL_BLOCKING                                 CL_TRUE
+#define CL_NON_BLOCKING                             CL_FALSE
+
+/* cl_platform_info */
+#define CL_PLATFORM_PROFILE                         0x0900
+#define CL_PLATFORM_VERSION                         0x0901
+#define CL_PLATFORM_NAME                            0x0902
+#define CL_PLATFORM_VENDOR                          0x0903
+#define CL_PLATFORM_EXTENSIONS                      0x0904
+
+/* cl_device_type - bitfield */
+#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
+#define CL_DEVICE_TYPE_CPU                          (1 << 1)
+#define CL_DEVICE_TYPE_GPU                          (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
+#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
+#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
+
+/* cl_device_info */
+#define CL_DEVICE_TYPE                              0x1000
+#define CL_DEVICE_VENDOR_ID                         0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS                 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS          0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE               0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES               0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR       0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT      0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT        0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG       0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT      0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE     0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY               0x100C
+#define CL_DEVICE_ADDRESS_BITS                      0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS               0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS              0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH                 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH                 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH                 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT                     0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE                0x1017
+#define CL_DEVICE_MAX_SAMPLERS                      0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN               0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE          0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG                  0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE             0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE         0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE             0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE                   0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE          0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS                 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE                    0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE                    0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT          0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION        0x1025
+#define CL_DEVICE_ENDIAN_LITTLE                     0x1026
+#define CL_DEVICE_AVAILABLE                         0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE                0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES            0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES                  0x102A
+#define CL_DEVICE_NAME                              0x102B
+#define CL_DEVICE_VENDOR                            0x102C
+#define CL_DRIVER_VERSION                           0x102D
+#define CL_DEVICE_PROFILE                           0x102E
+#define CL_DEVICE_VERSION                           0x102F
+#define CL_DEVICE_EXTENSIONS                        0x1030
+#define CL_DEVICE_PLATFORM                          0x1031
+#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
+/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
+#define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR          0x1036
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT         0x1037
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT           0x1038
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG          0x1039
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT         0x103A
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
+#define CL_DEVICE_OPENCL_C_VERSION                  0x103D
+#define CL_DEVICE_LINKER_AVAILABLE                  0x103E
+#define CL_DEVICE_BUILT_IN_KERNELS                  0x103F
+#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE             0x1040
+#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE              0x1041
+#define CL_DEVICE_PARENT_DEVICE                     0x1042
+#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES         0x1043
+#define CL_DEVICE_PARTITION_PROPERTIES              0x1044
+#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN         0x1045
+#define CL_DEVICE_PARTITION_TYPE                    0x1046
+#define CL_DEVICE_REFERENCE_COUNT                   0x1047
+#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC       0x1048
+#define CL_DEVICE_PRINTF_BUFFER_SIZE                0x1049
+#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT             0x104A
+#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT      0x104B
+
+/* cl_device_fp_config - bitfield */
+#define CL_FP_DENORM                                (1 << 0)
+#define CL_FP_INF_NAN                               (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
+#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
+#define CL_FP_ROUND_TO_INF                          (1 << 4)
+#define CL_FP_FMA                                   (1 << 5)
+#define CL_FP_SOFT_FLOAT                            (1 << 6)
+#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
+
+/* cl_device_mem_cache_type */
+#define CL_NONE                                     0x0
+#define CL_READ_ONLY_CACHE                          0x1
+#define CL_READ_WRITE_CACHE                         0x2
+
+/* cl_device_local_mem_type */
+#define CL_LOCAL                                    0x1
+#define CL_GLOBAL                                   0x2
+
+/* cl_device_exec_capabilities - bitfield */
+#define CL_EXEC_KERNEL                              (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
+
+/* cl_command_queue_properties - bitfield */
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
+
+/* cl_context_info  */
+#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
+#define CL_CONTEXT_DEVICES                          0x1081
+#define CL_CONTEXT_PROPERTIES                       0x1082
+#define CL_CONTEXT_NUM_DEVICES                      0x1083
+
+/* cl_context_properties */
+#define CL_CONTEXT_PLATFORM                         0x1084
+#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
+
+/* cl_device_partition_property */
+#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
+#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
+#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
+#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
+
+/* cl_device_affinity_domain */
+#define CL_DEVICE_AFFINITY_DOMAIN_NUMA                     (1 << 0)
+#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE                 (1 << 1)
+#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE                 (1 << 2)
+#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE                 (1 << 3)
+#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE                 (1 << 4)
+#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE       (1 << 5)
+
+/* cl_command_queue_info */
+#define CL_QUEUE_CONTEXT                            0x1090
+#define CL_QUEUE_DEVICE                             0x1091
+#define CL_QUEUE_REFERENCE_COUNT                    0x1092
+#define CL_QUEUE_PROPERTIES                         0x1093
+
+/* cl_mem_flags - bitfield */
+#define CL_MEM_READ_WRITE                           (1 << 0)
+#define CL_MEM_WRITE_ONLY                           (1 << 1)
+#define CL_MEM_READ_ONLY                            (1 << 2)
+#define CL_MEM_USE_HOST_PTR                         (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
+#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
+// reserved                                         (1 << 6)
+#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
+#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
+#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
+
+/* cl_mem_migration_flags - bitfield */
+#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
+#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
+
+/* cl_channel_order */
+#define CL_R                                        0x10B0
+#define CL_A                                        0x10B1
+#define CL_RG                                       0x10B2
+#define CL_RA                                       0x10B3
+#define CL_RGB                                      0x10B4
+#define CL_RGBA                                     0x10B5
+#define CL_BGRA                                     0x10B6
+#define CL_ARGB                                     0x10B7
+#define CL_INTENSITY                                0x10B8
+#define CL_LUMINANCE                                0x10B9
+#define CL_Rx                                       0x10BA
+#define CL_RGx                                      0x10BB
+#define CL_RGBx                                     0x10BC
+#define CL_DEPTH                                    0x10BD
+#define CL_DEPTH_STENCIL                            0x10BE
+
+/* cl_channel_type */
+#define CL_SNORM_INT8                               0x10D0
+#define CL_SNORM_INT16                              0x10D1
+#define CL_UNORM_INT8                               0x10D2
+#define CL_UNORM_INT16                              0x10D3
+#define CL_UNORM_SHORT_565                          0x10D4
+#define CL_UNORM_SHORT_555                          0x10D5
+#define CL_UNORM_INT_101010                         0x10D6
+#define CL_SIGNED_INT8                              0x10D7
+#define CL_SIGNED_INT16                             0x10D8
+#define CL_SIGNED_INT32                             0x10D9
+#define CL_UNSIGNED_INT8                            0x10DA
+#define CL_UNSIGNED_INT16                           0x10DB
+#define CL_UNSIGNED_INT32                           0x10DC
+#define CL_HALF_FLOAT                               0x10DD
+#define CL_FLOAT                                    0x10DE
+#define CL_UNORM_INT24                              0x10DF
+
+/* cl_mem_object_type */
+#define CL_MEM_OBJECT_BUFFER                        0x10F0
+#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
+#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
+#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
+
+/* cl_mem_info */
+#define CL_MEM_TYPE                                 0x1100
+#define CL_MEM_FLAGS                                0x1101
+#define CL_MEM_SIZE                                 0x1102
+#define CL_MEM_HOST_PTR                             0x1103
+#define CL_MEM_MAP_COUNT                            0x1104
+#define CL_MEM_REFERENCE_COUNT                      0x1105
+#define CL_MEM_CONTEXT                              0x1106
+#define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
+#define CL_MEM_OFFSET                               0x1108
+
+/* cl_image_info */
+#define CL_IMAGE_FORMAT                             0x1110
+#define CL_IMAGE_ELEMENT_SIZE                       0x1111
+#define CL_IMAGE_ROW_PITCH                          0x1112
+#define CL_IMAGE_SLICE_PITCH                        0x1113
+#define CL_IMAGE_WIDTH                              0x1114
+#define CL_IMAGE_HEIGHT                             0x1115
+#define CL_IMAGE_DEPTH                              0x1116
+#define CL_IMAGE_ARRAY_SIZE                         0x1117
+#define CL_IMAGE_BUFFER                             0x1118
+#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
+#define CL_IMAGE_NUM_SAMPLES                        0x111A
+
+/* cl_addressing_mode */
+#define CL_ADDRESS_NONE                             0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
+#define CL_ADDRESS_CLAMP                            0x1132
+#define CL_ADDRESS_REPEAT                           0x1133
+#define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
+
+/* cl_filter_mode */
+#define CL_FILTER_NEAREST                           0x1140
+#define CL_FILTER_LINEAR                            0x1141
+
+/* cl_sampler_info */
+#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
+#define CL_SAMPLER_CONTEXT                          0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
+#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
+#define CL_SAMPLER_FILTER_MODE                      0x1154
+
+/* cl_map_flags - bitfield */
+#define CL_MAP_READ                                 (1 << 0)
+#define CL_MAP_WRITE                                (1 << 1)
+#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
+
+/* cl_program_info */
+#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
+#define CL_PROGRAM_CONTEXT                          0x1161
+#define CL_PROGRAM_NUM_DEVICES                      0x1162
+#define CL_PROGRAM_DEVICES                          0x1163
+#define CL_PROGRAM_SOURCE                           0x1164
+#define CL_PROGRAM_BINARY_SIZES                     0x1165
+#define CL_PROGRAM_BINARIES                         0x1166
+#define CL_PROGRAM_NUM_KERNELS                      0x1167
+#define CL_PROGRAM_KERNEL_NAMES                     0x1168
+
+/* cl_program_build_info */
+#define CL_PROGRAM_BUILD_STATUS                     0x1181
+#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
+#define CL_PROGRAM_BUILD_LOG                        0x1183
+#define CL_PROGRAM_BINARY_TYPE                      0x1184
+
+/* cl_program_binary_type */
+#define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
+#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT      0x1
+#define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
+#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE           0x4
+
+/* cl_build_status */
+#define CL_BUILD_SUCCESS                            0
+#define CL_BUILD_NONE                               -1
+#define CL_BUILD_ERROR                              -2
+#define CL_BUILD_IN_PROGRESS                        -3
+
+/* cl_kernel_info */
+#define CL_KERNEL_FUNCTION_NAME                     0x1190
+#define CL_KERNEL_NUM_ARGS                          0x1191
+#define CL_KERNEL_REFERENCE_COUNT                   0x1192
+#define CL_KERNEL_CONTEXT                           0x1193
+#define CL_KERNEL_PROGRAM                           0x1194
+#define CL_KERNEL_ATTRIBUTES                        0x1195
+
+/* cl_kernel_arg_info */
+#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
+#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
+#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
+#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
+#define CL_KERNEL_ARG_NAME                          0x119A
+
+/* cl_kernel_arg_address_qualifier */
+#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
+#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
+#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
+#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
+
+/* cl_kernel_arg_access_qualifier */
+#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
+#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
+#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
+#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
+
+/* cl_kernel_arg_type_qualifer */
+#define CL_KERNEL_ARG_TYPE_NONE                     0
+#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
+#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
+#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
+
+/* cl_kernel_work_group_info */
+#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
+#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
+#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
+#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
+
+/* cl_event_info  */
+#define CL_EVENT_COMMAND_QUEUE                      0x11D0
+#define CL_EVENT_COMMAND_TYPE                       0x11D1
+#define CL_EVENT_REFERENCE_COUNT                    0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
+#define CL_EVENT_CONTEXT                            0x11D4
+
+/* cl_command_type */
+#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
+#define CL_COMMAND_TASK                             0x11F1
+#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
+#define CL_COMMAND_READ_BUFFER                      0x11F3
+#define CL_COMMAND_WRITE_BUFFER                     0x11F4
+#define CL_COMMAND_COPY_BUFFER                      0x11F5
+#define CL_COMMAND_READ_IMAGE                       0x11F6
+#define CL_COMMAND_WRITE_IMAGE                      0x11F7
+#define CL_COMMAND_COPY_IMAGE                       0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
+#define CL_COMMAND_MAP_BUFFER                       0x11FB
+#define CL_COMMAND_MAP_IMAGE                        0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
+#define CL_COMMAND_MARKER                           0x11FE
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
+#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
+#define CL_COMMAND_READ_BUFFER_RECT                 0x1201
+#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
+#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
+#define CL_COMMAND_USER                             0x1204
+#define CL_COMMAND_BARRIER                          0x1205
+#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
+#define CL_COMMAND_FILL_BUFFER                      0x1207
+#define CL_COMMAND_FILL_IMAGE                       0x1208
+
+/* command execution status */
+#define CL_COMPLETE                                 0x0
+#define CL_RUNNING                                  0x1
+#define CL_SUBMITTED                                0x2
+#define CL_QUEUED                                   0x3
+
+/* cl_buffer_create_type  */
+#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
+
+/* cl_profiling_info  */
+#define CL_PROFILING_COMMAND_QUEUED                 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
+#define CL_PROFILING_COMMAND_START                  0x1282
+#define CL_PROFILING_COMMAND_END                    0x1283
+
+/********************************************************************************************************/
+
+/* Platform API */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformIDs(cl_uint          /* num_entries */,
+                 cl_platform_id * /* platforms */,
+                 cl_uint *        /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformInfo(cl_platform_id   /* platform */,
+                  cl_platform_info /* param_name */,
+                  size_t           /* param_value_size */,
+                  void *           /* param_value */,
+                  size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Device APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id   /* platform */,
+               cl_device_type   /* device_type */,
+               cl_uint          /* num_entries */,
+               cl_device_id *   /* devices */,
+               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceInfo(cl_device_id    /* device */,
+                cl_device_info  /* param_name */,
+                size_t          /* param_value_size */,
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateSubDevices(cl_device_id                         /* in_device */,
+                   const cl_device_partition_property * /* properties */,
+                   cl_uint                              /* num_devices */,
+                   cl_device_id *                       /* out_devices */,
+                   cl_uint *                            /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
+
+/* Context APIs  */
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties * /* properties */,
+                cl_uint                 /* num_devices */,
+                const cl_device_id *    /* devices */,
+                void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+                void *                  /* user_data */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContextFromType(const cl_context_properties * /* properties */,
+                        cl_device_type          /* device_type */,
+                        void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+                        void *                  /* user_data */,
+                        cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetContextInfo(cl_context         /* context */,
+                 cl_context_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Command Queue APIs */
+extern CL_API_ENTRY cl_command_queue CL_API_CALL
+clCreateCommandQueue(cl_context                     /* context */,
+                     cl_device_id                   /* device */,
+                     cl_command_queue_properties    /* properties */,
+                     cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetCommandQueueInfo(cl_command_queue      /* command_queue */,
+                      cl_command_queue_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Memory Object APIs */
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBuffer(cl_context   /* context */,
+               cl_mem_flags /* flags */,
+               size_t       /* size */,
+               void *       /* host_ptr */,
+               cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateSubBuffer(cl_mem                   /* buffer */,
+                  cl_mem_flags             /* flags */,
+                  cl_buffer_create_type    /* buffer_create_type */,
+                  const void *             /* buffer_create_info */,
+                  cl_int *                 /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage(cl_context              /* context */,
+              cl_mem_flags            /* flags */,
+              const cl_image_format * /* image_format */,
+              const cl_image_desc *   /* image_desc */,
+              void *                  /* host_ptr */,
+              cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedImageFormats(cl_context           /* context */,
+                           cl_mem_flags         /* flags */,
+                           cl_mem_object_type   /* image_type */,
+                           cl_uint              /* num_entries */,
+                           cl_image_format *    /* image_formats */,
+                           cl_uint *            /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetMemObjectInfo(cl_mem           /* memobj */,
+                   cl_mem_info      /* param_name */,
+                   size_t           /* param_value_size */,
+                   void *           /* param_value */,
+                   size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetImageInfo(cl_mem           /* image */,
+               cl_image_info    /* param_name */,
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetMemObjectDestructorCallback(  cl_mem /* memobj */,
+                                    void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
+                                    void * /*user_data */ )             CL_API_SUFFIX__VERSION_1_1;
+
+/* Sampler APIs */
+extern CL_API_ENTRY cl_sampler CL_API_CALL
+clCreateSampler(cl_context          /* context */,
+                cl_bool             /* normalized_coords */,
+                cl_addressing_mode  /* addressing_mode */,
+                cl_filter_mode      /* filter_mode */,
+                cl_int *            /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSamplerInfo(cl_sampler         /* sampler */,
+                 cl_sampler_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Program Object APIs  */
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithSource(cl_context        /* context */,
+                          cl_uint           /* count */,
+                          const char **     /* strings */,
+                          const size_t *    /* lengths */,
+                          cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBinary(cl_context                     /* context */,
+                          cl_uint                        /* num_devices */,
+                          const cl_device_id *           /* device_list */,
+                          const size_t *                 /* lengths */,
+                          const unsigned char **         /* binaries */,
+                          cl_int *                       /* binary_status */,
+                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBuiltInKernels(cl_context            /* context */,
+                                  cl_uint               /* num_devices */,
+                                  const cl_device_id *  /* device_list */,
+                                  const char *          /* kernel_names */,
+                                  cl_int *              /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clBuildProgram(cl_program           /* program */,
+               cl_uint              /* num_devices */,
+               const cl_device_id * /* device_list */,
+               const char *         /* options */,
+               void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCompileProgram(cl_program           /* program */,
+                 cl_uint              /* num_devices */,
+                 const cl_device_id * /* device_list */,
+                 const char *         /* options */,
+                 cl_uint              /* num_input_headers */,
+                 const cl_program *   /* input_headers */,
+                 const char **        /* header_include_names */,
+                 void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+                 void *               /* user_data */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clLinkProgram(cl_context           /* context */,
+              cl_uint              /* num_devices */,
+              const cl_device_id * /* device_list */,
+              const char *         /* options */,
+              cl_uint              /* num_input_programs */,
+              const cl_program *   /* input_programs */,
+              void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+              void *               /* user_data */,
+              cl_int *             /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
+
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramInfo(cl_program         /* program */,
+                 cl_program_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramBuildInfo(cl_program            /* program */,
+                      cl_device_id          /* device */,
+                      cl_program_build_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Kernel Object APIs */
+extern CL_API_ENTRY cl_kernel CL_API_CALL
+clCreateKernel(cl_program      /* program */,
+               const char *    /* kernel_name */,
+               cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateKernelsInProgram(cl_program     /* program */,
+                         cl_uint        /* num_kernels */,
+                         cl_kernel *    /* kernels */,
+                         cl_uint *      /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainKernel(cl_kernel    /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArg(cl_kernel    /* kernel */,
+               cl_uint      /* arg_index */,
+               size_t       /* arg_size */,
+               const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelInfo(cl_kernel       /* kernel */,
+                cl_kernel_info  /* param_name */,
+                size_t          /* param_value_size */,
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelArgInfo(cl_kernel       /* kernel */,
+                   cl_uint         /* arg_indx */,
+                   cl_kernel_arg_info  /* param_name */,
+                   size_t          /* param_value_size */,
+                   void *          /* param_value */,
+                   size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
+                         cl_device_id               /* device */,
+                         cl_kernel_work_group_info  /* param_name */,
+                         size_t                     /* param_value_size */,
+                         void *                     /* param_value */,
+                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Event Object APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clWaitForEvents(cl_uint             /* num_events */,
+                const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventInfo(cl_event         /* event */,
+               cl_event_info    /* param_name */,
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateUserEvent(cl_context    /* context */,
+                  cl_int *      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetUserEventStatus(cl_event   /* event */,
+                     cl_int     /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetEventCallback( cl_event    /* event */,
+                    cl_int      /* command_exec_callback_type */,
+                    void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+                    void *      /* user_data */) CL_API_SUFFIX__VERSION_1_1;
+
+/* Profiling APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventProfilingInfo(cl_event            /* event */,
+                        cl_profiling_info   /* param_name */,
+                        size_t              /* param_value_size */,
+                        void *              /* param_value */,
+                        size_t *            /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Flush and Finish APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Enqueued Commands APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBuffer(cl_command_queue    /* command_queue */,
+                    cl_mem              /* buffer */,
+                    cl_bool             /* blocking_read */,
+                    size_t              /* offset */,
+                    size_t              /* size */,
+                    void *              /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBufferRect(cl_command_queue    /* command_queue */,
+                        cl_mem              /* buffer */,
+                        cl_bool             /* blocking_read */,
+                        const size_t *      /* buffer_offset */,
+                        const size_t *      /* host_offset */,
+                        const size_t *      /* region */,
+                        size_t              /* buffer_row_pitch */,
+                        size_t              /* buffer_slice_pitch */,
+                        size_t              /* host_row_pitch */,
+                        size_t              /* host_slice_pitch */,
+                        void *              /* ptr */,
+                        cl_uint             /* num_events_in_wait_list */,
+                        const cl_event *    /* event_wait_list */,
+                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBuffer(cl_command_queue   /* command_queue */,
+                     cl_mem             /* buffer */,
+                     cl_bool            /* blocking_write */,
+                     size_t             /* offset */,
+                     size_t             /* size */,
+                     const void *       /* ptr */,
+                     cl_uint            /* num_events_in_wait_list */,
+                     const cl_event *   /* event_wait_list */,
+                     cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBufferRect(cl_command_queue    /* command_queue */,
+                         cl_mem              /* buffer */,
+                         cl_bool             /* blocking_write */,
+                         const size_t *      /* buffer_offset */,
+                         const size_t *      /* host_offset */,
+                         const size_t *      /* region */,
+                         size_t              /* buffer_row_pitch */,
+                         size_t              /* buffer_slice_pitch */,
+                         size_t              /* host_row_pitch */,
+                         size_t              /* host_slice_pitch */,
+                         const void *        /* ptr */,
+                         cl_uint             /* num_events_in_wait_list */,
+                         const cl_event *    /* event_wait_list */,
+                         cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueFillBuffer(cl_command_queue   /* command_queue */,
+                    cl_mem             /* buffer */,
+                    const void *       /* pattern */,
+                    size_t             /* pattern_size */,
+                    size_t             /* offset */,
+                    size_t             /* size */,
+                    cl_uint            /* num_events_in_wait_list */,
+                    const cl_event *   /* event_wait_list */,
+                    cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBuffer(cl_command_queue    /* command_queue */,
+                    cl_mem              /* src_buffer */,
+                    cl_mem              /* dst_buffer */,
+                    size_t              /* src_offset */,
+                    size_t              /* dst_offset */,
+                    size_t              /* size */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferRect(cl_command_queue    /* command_queue */,
+                        cl_mem              /* src_buffer */,
+                        cl_mem              /* dst_buffer */,
+                        const size_t *      /* src_origin */,
+                        const size_t *      /* dst_origin */,
+                        const size_t *      /* region */,
+                        size_t              /* src_row_pitch */,
+                        size_t              /* src_slice_pitch */,
+                        size_t              /* dst_row_pitch */,
+                        size_t              /* dst_slice_pitch */,
+                        cl_uint             /* num_events_in_wait_list */,
+                        const cl_event *    /* event_wait_list */,
+                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadImage(cl_command_queue     /* command_queue */,
+                   cl_mem               /* image */,
+                   cl_bool              /* blocking_read */,
+                   const size_t *       /* origin[3] */,
+                   const size_t *       /* region[3] */,
+                   size_t               /* row_pitch */,
+                   size_t               /* slice_pitch */,
+                   void *               /* ptr */,
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteImage(cl_command_queue    /* command_queue */,
+                    cl_mem              /* image */,
+                    cl_bool             /* blocking_write */,
+                    const size_t *      /* origin[3] */,
+                    const size_t *      /* region[3] */,
+                    size_t              /* input_row_pitch */,
+                    size_t              /* input_slice_pitch */,
+                    const void *        /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueFillImage(cl_command_queue   /* command_queue */,
+                   cl_mem             /* image */,
+                   const void *       /* fill_color */,
+                   const size_t *     /* origin[3] */,
+                   const size_t *     /* region[3] */,
+                   cl_uint            /* num_events_in_wait_list */,
+                   const cl_event *   /* event_wait_list */,
+                   cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImage(cl_command_queue     /* command_queue */,
+                   cl_mem               /* src_image */,
+                   cl_mem               /* dst_image */,
+                   const size_t *       /* src_origin[3] */,
+                   const size_t *       /* dst_origin[3] */,
+                   const size_t *       /* region[3] */,
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_image */,
+                           cl_mem           /* dst_buffer */,
+                           const size_t *   /* src_origin[3] */,
+                           const size_t *   /* region[3] */,
+                           size_t           /* dst_offset */,
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_buffer */,
+                           cl_mem           /* dst_image */,
+                           size_t           /* src_offset */,
+                           const size_t *   /* dst_origin[3] */,
+                           const size_t *   /* region[3] */,
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapBuffer(cl_command_queue /* command_queue */,
+                   cl_mem           /* buffer */,
+                   cl_bool          /* blocking_map */,
+                   cl_map_flags     /* map_flags */,
+                   size_t           /* offset */,
+                   size_t           /* size */,
+                   cl_uint          /* num_events_in_wait_list */,
+                   const cl_event * /* event_wait_list */,
+                   cl_event *       /* event */,
+                   cl_int *         /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapImage(cl_command_queue  /* command_queue */,
+                  cl_mem            /* image */,
+                  cl_bool           /* blocking_map */,
+                  cl_map_flags      /* map_flags */,
+                  const size_t *    /* origin[3] */,
+                  const size_t *    /* region[3] */,
+                  size_t *          /* image_row_pitch */,
+                  size_t *          /* image_slice_pitch */,
+                  cl_uint           /* num_events_in_wait_list */,
+                  const cl_event *  /* event_wait_list */,
+                  cl_event *        /* event */,
+                  cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
+                        cl_mem           /* memobj */,
+                        void *           /* mapped_ptr */,
+                        cl_uint          /* num_events_in_wait_list */,
+                        const cl_event *  /* event_wait_list */,
+                        cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMigrateMemObjects(cl_command_queue       /* command_queue */,
+                           cl_uint                /* num_mem_objects */,
+                           const cl_mem *         /* mem_objects */,
+                           cl_mem_migration_flags /* flags */,
+                           cl_uint                /* num_events_in_wait_list */,
+                           const cl_event *       /* event_wait_list */,
+                           cl_event *             /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+                       cl_kernel        /* kernel */,
+                       cl_uint          /* work_dim */,
+                       const size_t *   /* global_work_offset */,
+                       const size_t *   /* global_work_size */,
+                       const size_t *   /* local_work_size */,
+                       cl_uint          /* num_events_in_wait_list */,
+                       const cl_event * /* event_wait_list */,
+                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueTask(cl_command_queue  /* command_queue */,
+              cl_kernel         /* kernel */,
+              cl_uint           /* num_events_in_wait_list */,
+              const cl_event *  /* event_wait_list */,
+              cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
+                      void (CL_CALLBACK * /*user_func*/)(void *),
+                      void *            /* args */,
+                      size_t            /* cb_args */,
+                      cl_uint           /* num_mem_objects */,
+                      const cl_mem *    /* mem_list */,
+                      const void **     /* args_mem_loc */,
+                      cl_uint           /* num_events_in_wait_list */,
+                      const cl_event *  /* event_wait_list */,
+                      cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
+                            cl_uint           /* num_events_in_wait_list */,
+                            const cl_event *  /* event_wait_list */,
+                            cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
+                             cl_uint           /* num_events_in_wait_list */,
+                             const cl_event *  /* event_wait_list */,
+                             cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_2;
+
+
+/* Extension function access
+ *
+ * Returns the extension function address for the given function name,
+ * or NULL if a valid function can not be found.  The client must
+ * check to make sure the address is not NULL, before using or
+ * calling the returned function address.
+ */
+extern CL_API_ENTRY void * CL_API_CALL
+clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
+                                         const char *   /* func_name */) CL_API_SUFFIX__VERSION_1_2;
+
+
+// Deprecated OpenCL 1.1 APIs
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateImage2D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */,
+                size_t                  /* image_height */,
+                size_t                  /* image_row_pitch */,
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
+clCreateImage3D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */,
+                size_t                  /* image_height */,
+                size_t                  /* image_depth */,
+                size_t                  /* image_row_pitch */,
+                size_t                  /* image_slice_pitch */,
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueMarker(cl_command_queue    /* command_queue */,
+                cl_event *          /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+                        cl_uint          /* num_events */,
+                        const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
+clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
+clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __OPENCL_CL_H */
diff --git a/extras/cl_platform.h b/extras/cl_platform.h
new file mode 100644 (file)
index 0000000..edc7321
--- /dev/null
@@ -0,0 +1,1254 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
+
+#ifndef __CL_PLATFORM_H
+#define __CL_PLATFORM_H
+
+#ifdef __APPLE__
+    /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
+    #include <AvailabilityMacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32)
+    #define CL_API_ENTRY
+    #define CL_API_CALL     __stdcall
+    #define CL_CALLBACK     __stdcall
+#else
+    #define CL_API_ENTRY
+    #define CL_API_CALL
+    #define CL_CALLBACK
+#endif
+
+#ifdef __APPLE__
+    #define CL_EXTENSION_WEAK_LINK       __attribute__((weak_import))
+    #define CL_API_SUFFIX__VERSION_1_0                  AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_EXT_SUFFIX__VERSION_1_0                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_API_SUFFIX__VERSION_1_1                  AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+    #define GCL_API_SUFFIX__VERSION_1_1                 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+    #define CL_EXT_SUFFIX__VERSION_1_1                  CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED       CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+
+    #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
+        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
+    #else
+        #warning  This path should never happen outside of internal operating system development.  AvailabilityMacros do not function correctly here!
+        #define CL_API_SUFFIX__VERSION_1_2              AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+        #define GCL_API_SUFFIX__VERSION_1_2             AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+        #define CL_EXT_SUFFIX__VERSION_1_2              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+    #endif
+#else
+    #define CL_EXTENSION_WEAK_LINK
+    #define CL_API_SUFFIX__VERSION_1_0
+    #define CL_EXT_SUFFIX__VERSION_1_0
+    #define CL_API_SUFFIX__VERSION_1_1
+    #define CL_EXT_SUFFIX__VERSION_1_1
+    #define CL_API_SUFFIX__VERSION_1_2
+    #define CL_EXT_SUFFIX__VERSION_1_2
+
+    #ifdef __GNUC__
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+        #endif
+
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+        #endif
+    #elif _WIN32
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)
+        #endif
+
+        #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+        #else
+            #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+            #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)
+        #endif
+    #else
+        #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+        #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
+
+        #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+        #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+    #endif
+#endif
+
+#if (defined (_WIN32) && defined(_MSC_VER))
+
+/* scalar types  */
+typedef signed   __int8         cl_char;
+typedef unsigned __int8         cl_uchar;
+typedef signed   __int16        cl_short;
+typedef unsigned __int16        cl_ushort;
+typedef signed   __int32        cl_int;
+typedef unsigned __int32        cl_uint;
+typedef signed   __int64        cl_long;
+typedef unsigned __int64        cl_ulong;
+
+typedef unsigned __int16        cl_half;
+typedef float                   cl_float;
+typedef double                  cl_double;
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT         8
+#define CL_SCHAR_MAX        127
+#define CL_SCHAR_MIN        (-127-1)
+#define CL_CHAR_MAX         CL_SCHAR_MAX
+#define CL_CHAR_MIN         CL_SCHAR_MIN
+#define CL_UCHAR_MAX        255
+#define CL_SHRT_MAX         32767
+#define CL_SHRT_MIN         (-32767-1)
+#define CL_USHRT_MAX        65535
+#define CL_INT_MAX          2147483647
+#define CL_INT_MIN          (-2147483647-1)
+#define CL_UINT_MAX         0xffffffffU
+#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG          6
+#define CL_FLT_MANT_DIG     24
+#define CL_FLT_MAX_10_EXP   +38
+#define CL_FLT_MAX_EXP      +128
+#define CL_FLT_MIN_10_EXP   -37
+#define CL_FLT_MIN_EXP      -125
+#define CL_FLT_RADIX        2
+#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
+#define CL_FLT_MIN          1.175494350822287507969e-38f
+#define CL_FLT_EPSILON      0x1.0p-23f
+
+#define CL_DBL_DIG          15
+#define CL_DBL_MANT_DIG     53
+#define CL_DBL_MAX_10_EXP   +308
+#define CL_DBL_MAX_EXP      +1024
+#define CL_DBL_MIN_10_EXP   -307
+#define CL_DBL_MIN_EXP      -1021
+#define CL_DBL_RADIX        2
+#define CL_DBL_MAX          179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
+#define CL_DBL_MIN          2.225073858507201383090e-308
+#define CL_DBL_EPSILON      2.220446049250313080847e-16
+
+#define  CL_M_E             2.718281828459045090796
+#define  CL_M_LOG2E         1.442695040888963387005
+#define  CL_M_LOG10E        0.434294481903251816668
+#define  CL_M_LN2           0.693147180559945286227
+#define  CL_M_LN10          2.302585092994045901094
+#define  CL_M_PI            3.141592653589793115998
+#define  CL_M_PI_2          1.570796326794896557999
+#define  CL_M_PI_4          0.785398163397448278999
+#define  CL_M_1_PI          0.318309886183790691216
+#define  CL_M_2_PI          0.636619772367581382433
+#define  CL_M_2_SQRTPI      1.128379167095512558561
+#define  CL_M_SQRT2         1.414213562373095145475
+#define  CL_M_SQRT1_2       0.707106781186547572737
+
+#define  CL_M_E_F           2.71828174591064f
+#define  CL_M_LOG2E_F       1.44269502162933f
+#define  CL_M_LOG10E_F      0.43429449200630f
+#define  CL_M_LN2_F         0.69314718246460f
+#define  CL_M_LN10_F        2.30258512496948f
+#define  CL_M_PI_F          3.14159274101257f
+#define  CL_M_PI_2_F        1.57079637050629f
+#define  CL_M_PI_4_F        0.78539818525314f
+#define  CL_M_1_PI_F        0.31830987334251f
+#define  CL_M_2_PI_F        0.63661974668503f
+#define  CL_M_2_SQRTPI_F    1.12837922573090f
+#define  CL_M_SQRT2_F       1.41421353816986f
+#define  CL_M_SQRT1_2_F     0.70710676908493f
+
+#define CL_NAN              (CL_INFINITY - CL_INFINITY)
+#define CL_HUGE_VALF        ((cl_float) 1e50)
+#define CL_HUGE_VAL         ((cl_double) 1e500)
+#define CL_MAXFLOAT         CL_FLT_MAX
+#define CL_INFINITY         CL_HUGE_VALF
+
+#else
+
+#include <stdint.h>
+
+/* scalar types  */
+typedef int8_t          cl_char;
+typedef uint8_t         cl_uchar;
+typedef int16_t         cl_short    __attribute__((aligned(2)));
+typedef uint16_t        cl_ushort   __attribute__((aligned(2)));
+typedef int32_t         cl_int      __attribute__((aligned(4)));
+typedef uint32_t        cl_uint     __attribute__((aligned(4)));
+typedef int64_t         cl_long     __attribute__((aligned(8)));
+typedef uint64_t        cl_ulong    __attribute__((aligned(8)));
+
+typedef uint16_t        cl_half     __attribute__((aligned(2)));
+typedef float           cl_float    __attribute__((aligned(4)));
+typedef double          cl_double   __attribute__((aligned(8)));
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT         8
+#define CL_SCHAR_MAX        127
+#define CL_SCHAR_MIN        (-127-1)
+#define CL_CHAR_MAX         CL_SCHAR_MAX
+#define CL_CHAR_MIN         CL_SCHAR_MIN
+#define CL_UCHAR_MAX        255
+#define CL_SHRT_MAX         32767
+#define CL_SHRT_MIN         (-32767-1)
+#define CL_USHRT_MAX        65535
+#define CL_INT_MAX          2147483647
+#define CL_INT_MIN          (-2147483647-1)
+#define CL_UINT_MAX         0xffffffffU
+#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG          6
+#define CL_FLT_MANT_DIG     24
+#define CL_FLT_MAX_10_EXP   +38
+#define CL_FLT_MAX_EXP      +128
+#define CL_FLT_MIN_10_EXP   -37
+#define CL_FLT_MIN_EXP      -125
+#define CL_FLT_RADIX        2
+#define CL_FLT_MAX          0x1.fffffep127f
+#define CL_FLT_MIN          0x1.0p-126f
+#define CL_FLT_EPSILON      0x1.0p-23f
+
+#define CL_DBL_DIG          15
+#define CL_DBL_MANT_DIG     53
+#define CL_DBL_MAX_10_EXP   +308
+#define CL_DBL_MAX_EXP      +1024
+#define CL_DBL_MIN_10_EXP   -307
+#define CL_DBL_MIN_EXP      -1021
+#define CL_DBL_RADIX        2
+#define CL_DBL_MAX          0x1.fffffffffffffp1023
+#define CL_DBL_MIN          0x1.0p-1022
+#define CL_DBL_EPSILON      0x1.0p-52
+
+#define  CL_M_E             2.718281828459045090796
+#define  CL_M_LOG2E         1.442695040888963387005
+#define  CL_M_LOG10E        0.434294481903251816668
+#define  CL_M_LN2           0.693147180559945286227
+#define  CL_M_LN10          2.302585092994045901094
+#define  CL_M_PI            3.141592653589793115998
+#define  CL_M_PI_2          1.570796326794896557999
+#define  CL_M_PI_4          0.785398163397448278999
+#define  CL_M_1_PI          0.318309886183790691216
+#define  CL_M_2_PI          0.636619772367581382433
+#define  CL_M_2_SQRTPI      1.128379167095512558561
+#define  CL_M_SQRT2         1.414213562373095145475
+#define  CL_M_SQRT1_2       0.707106781186547572737
+
+#define  CL_M_E_F           2.71828174591064f
+#define  CL_M_LOG2E_F       1.44269502162933f
+#define  CL_M_LOG10E_F      0.43429449200630f
+#define  CL_M_LN2_F         0.69314718246460f
+#define  CL_M_LN10_F        2.30258512496948f
+#define  CL_M_PI_F          3.14159274101257f
+#define  CL_M_PI_2_F        1.57079637050629f
+#define  CL_M_PI_4_F        0.78539818525314f
+#define  CL_M_1_PI_F        0.31830987334251f
+#define  CL_M_2_PI_F        0.63661974668503f
+#define  CL_M_2_SQRTPI_F    1.12837922573090f
+#define  CL_M_SQRT2_F       1.41421353816986f
+#define  CL_M_SQRT1_2_F     0.70710676908493f
+
+#if defined( __GNUC__ )
+   #define CL_HUGE_VALF     __builtin_huge_valf()
+   #define CL_HUGE_VAL      __builtin_huge_val()
+   #define CL_NAN           __builtin_nanf( "" )
+#else
+   #define CL_HUGE_VALF     ((cl_float) 1e50)
+   #define CL_HUGE_VAL      ((cl_double) 1e500)
+   float nanf( const char * );
+   #define CL_NAN           nanf( "" )
+#endif
+#define CL_MAXFLOAT         CL_FLT_MAX
+#define CL_INFINITY         CL_HUGE_VALF
+
+#endif
+
+#include <stddef.h>
+
+/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
+typedef unsigned int cl_GLuint;
+typedef int          cl_GLint;
+typedef unsigned int cl_GLenum;
+
+/*
+ * Vector types
+ *
+ *  Note:   OpenCL requires that all types be naturally aligned.
+ *          This means that vector types must be naturally aligned.
+ *          For example, a vector of four floats must be aligned to
+ *          a 16 byte boundary (calculated as 4 * the natural 4-byte
+ *          alignment of the float).  The alignment qualifiers here
+ *          will only function properly if your compiler supports them
+ *          and if you don't actively work to defeat them.  For example,
+ *          in order for a cl_float4 to be 16 byte aligned in a struct,
+ *          the start of the struct must itself be 16-byte aligned.
+ *
+ *          Maintaining proper alignment is the user's responsibility.
+ */
+
+/* Define basic vector types */
+#if defined( __VEC__ )
+   #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
+   typedef vector unsigned char     __cl_uchar16;
+   typedef vector signed char       __cl_char16;
+   typedef vector unsigned short    __cl_ushort8;
+   typedef vector signed short      __cl_short8;
+   typedef vector unsigned int      __cl_uint4;
+   typedef vector signed int        __cl_int4;
+   typedef vector float             __cl_float4;
+   #define  __CL_UCHAR16__  1
+   #define  __CL_CHAR16__   1
+   #define  __CL_USHORT8__  1
+   #define  __CL_SHORT8__   1
+   #define  __CL_UINT4__    1
+   #define  __CL_INT4__     1
+   #define  __CL_FLOAT4__   1
+#endif
+
+#if defined( __SSE__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <xmmintrin.h>
+    #endif
+    #if defined( __GNUC__ )
+        typedef float __cl_float4   __attribute__((vector_size(16)));
+    #else
+        typedef __m128 __cl_float4;
+    #endif
+    #define __CL_FLOAT4__   1
+#endif
+
+#if defined( __SSE2__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <emmintrin.h>
+    #endif
+    #if defined( __GNUC__ )
+        typedef cl_uchar    __cl_uchar16    __attribute__((vector_size(16)));
+        typedef cl_char     __cl_char16     __attribute__((vector_size(16)));
+        typedef cl_ushort   __cl_ushort8    __attribute__((vector_size(16)));
+        typedef cl_short    __cl_short8     __attribute__((vector_size(16)));
+        typedef cl_uint     __cl_uint4      __attribute__((vector_size(16)));
+        typedef cl_int      __cl_int4       __attribute__((vector_size(16)));
+        typedef cl_ulong    __cl_ulong2     __attribute__((vector_size(16)));
+        typedef cl_long     __cl_long2      __attribute__((vector_size(16)));
+        typedef cl_double   __cl_double2    __attribute__((vector_size(16)));
+    #else
+        typedef __m128i __cl_uchar16;
+        typedef __m128i __cl_char16;
+        typedef __m128i __cl_ushort8;
+        typedef __m128i __cl_short8;
+        typedef __m128i __cl_uint4;
+        typedef __m128i __cl_int4;
+        typedef __m128i __cl_ulong2;
+        typedef __m128i __cl_long2;
+        typedef __m128d __cl_double2;
+    #endif
+    #define __CL_UCHAR16__  1
+    #define __CL_CHAR16__   1
+    #define __CL_USHORT8__  1
+    #define __CL_SHORT8__   1
+    #define __CL_INT4__     1
+    #define __CL_UINT4__    1
+    #define __CL_ULONG2__   1
+    #define __CL_LONG2__    1
+    #define __CL_DOUBLE2__  1
+#endif
+
+#if defined( __MMX__ )
+    #include <mmintrin.h>
+    #if defined( __GNUC__ )
+        typedef cl_uchar    __cl_uchar8     __attribute__((vector_size(8)));
+        typedef cl_char     __cl_char8      __attribute__((vector_size(8)));
+        typedef cl_ushort   __cl_ushort4    __attribute__((vector_size(8)));
+        typedef cl_short    __cl_short4     __attribute__((vector_size(8)));
+        typedef cl_uint     __cl_uint2      __attribute__((vector_size(8)));
+        typedef cl_int      __cl_int2       __attribute__((vector_size(8)));
+        typedef cl_ulong    __cl_ulong1     __attribute__((vector_size(8)));
+        typedef cl_long     __cl_long1      __attribute__((vector_size(8)));
+        typedef cl_float    __cl_float2     __attribute__((vector_size(8)));
+    #else
+        typedef __m64       __cl_uchar8;
+        typedef __m64       __cl_char8;
+        typedef __m64       __cl_ushort4;
+        typedef __m64       __cl_short4;
+        typedef __m64       __cl_uint2;
+        typedef __m64       __cl_int2;
+        typedef __m64       __cl_ulong1;
+        typedef __m64       __cl_long1;
+        typedef __m64       __cl_float2;
+    #endif
+    #define __CL_UCHAR8__   1
+    #define __CL_CHAR8__    1
+    #define __CL_USHORT4__  1
+    #define __CL_SHORT4__   1
+    #define __CL_INT2__     1
+    #define __CL_UINT2__    1
+    #define __CL_ULONG1__   1
+    #define __CL_LONG1__    1
+    #define __CL_FLOAT2__   1
+#endif
+
+#if defined( __AVX__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <immintrin.h>
+    #endif
+    #if defined( __GNUC__ )
+        typedef cl_float    __cl_float8     __attribute__((vector_size(32)));
+        typedef cl_double   __cl_double4    __attribute__((vector_size(32)));
+    #else
+        typedef __m256      __cl_float8;
+        typedef __m256d     __cl_double4;
+    #endif
+    #define __CL_FLOAT8__   1
+    #define __CL_DOUBLE4__  1
+#endif
+
+/* Define alignment keys */
+#if defined( __GNUC__ )
+    #define CL_ALIGNED(_x)          __attribute__ ((aligned(_x)))
+#elif defined( _WIN32) && (_MSC_VER)
+    /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements     */
+    /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx                                                 */
+    /* #include <crtdefs.h>                                                                                             */
+    /* #define CL_ALIGNED(_x)          _CRT_ALIGN(_x)                                                                   */
+    #define CL_ALIGNED(_x)
+#else
+   #warning  Need to implement some method to align data here
+   #define  CL_ALIGNED(_x)
+#endif
+
+/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+    /* .xyzw and .s0123...{f|F} are supported */
+    #define CL_HAS_NAMED_VECTOR_FIELDS 1
+    /* .hi and .lo are supported */
+    #define CL_HAS_HI_LO_VECTOR_FIELDS 1
+#endif
+
+/* Define cl_vector types */
+
+/* ---- cl_charn ---- */
+typedef union
+{
+    cl_char  CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y; };
+   __extension__ struct{ cl_char  s0, s1; };
+   __extension__ struct{ cl_char  lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+    __cl_char2     v2;
+#endif
+}cl_char2;
+
+typedef union
+{
+    cl_char  CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y, z, w; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3; };
+   __extension__ struct{ cl_char2 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+    __cl_char2     v2[2];
+#endif
+#if defined( __CL_CHAR4__)
+    __cl_char4     v4;
+#endif
+}cl_char4;
+
+/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
+typedef  cl_char4  cl_char3;
+
+typedef union
+{
+    cl_char   CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y, z, w; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_char4 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+    __cl_char2     v2[4];
+#endif
+#if defined( __CL_CHAR4__)
+    __cl_char4     v4[2];
+#endif
+#if defined( __CL_CHAR8__ )
+    __cl_char8     v8;
+#endif
+}cl_char8;
+
+typedef union
+{
+    cl_char  CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_char8 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__)
+    __cl_char2     v2[8];
+#endif
+#if defined( __CL_CHAR4__)
+    __cl_char4     v4[4];
+#endif
+#if defined( __CL_CHAR8__ )
+    __cl_char8     v8[2];
+#endif
+#if defined( __CL_CHAR16__ )
+    __cl_char16    v16;
+#endif
+}cl_char16;
+
+
+/* ---- cl_ucharn ---- */
+typedef union
+{
+    cl_uchar  CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y; };
+   __extension__ struct{ cl_uchar  s0, s1; };
+   __extension__ struct{ cl_uchar  lo, hi; };
+#endif
+#if defined( __cl_uchar2__)
+    __cl_uchar2     v2;
+#endif
+}cl_uchar2;
+
+typedef union
+{
+    cl_uchar  CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y, z, w; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3; };
+   __extension__ struct{ cl_uchar2 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+    __cl_uchar2     v2[2];
+#endif
+#if defined( __CL_UCHAR4__)
+    __cl_uchar4     v4;
+#endif
+}cl_uchar4;
+
+/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
+typedef  cl_uchar4  cl_uchar3;
+
+typedef union
+{
+    cl_uchar   CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y, z, w; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_uchar4 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+    __cl_uchar2     v2[4];
+#endif
+#if defined( __CL_UCHAR4__)
+    __cl_uchar4     v4[2];
+#endif
+#if defined( __CL_UCHAR8__ )
+    __cl_uchar8     v8;
+#endif
+}cl_uchar8;
+
+typedef union
+{
+    cl_uchar  CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_uchar8 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__)
+    __cl_uchar2     v2[8];
+#endif
+#if defined( __CL_UCHAR4__)
+    __cl_uchar4     v4[4];
+#endif
+#if defined( __CL_UCHAR8__ )
+    __cl_uchar8     v8[2];
+#endif
+#if defined( __CL_UCHAR16__ )
+    __cl_uchar16    v16;
+#endif
+}cl_uchar16;
+
+
+/* ---- cl_shortn ---- */
+typedef union
+{
+    cl_short  CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y; };
+   __extension__ struct{ cl_short  s0, s1; };
+   __extension__ struct{ cl_short  lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+    __cl_short2     v2;
+#endif
+}cl_short2;
+
+typedef union
+{
+    cl_short  CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y, z, w; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3; };
+   __extension__ struct{ cl_short2 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+    __cl_short2     v2[2];
+#endif
+#if defined( __CL_SHORT4__)
+    __cl_short4     v4;
+#endif
+}cl_short4;
+
+/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
+typedef  cl_short4  cl_short3;
+
+typedef union
+{
+    cl_short   CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y, z, w; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_short4 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+    __cl_short2     v2[4];
+#endif
+#if defined( __CL_SHORT4__)
+    __cl_short4     v4[2];
+#endif
+#if defined( __CL_SHORT8__ )
+    __cl_short8     v8;
+#endif
+}cl_short8;
+
+typedef union
+{
+    cl_short  CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_short8 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__)
+    __cl_short2     v2[8];
+#endif
+#if defined( __CL_SHORT4__)
+    __cl_short4     v4[4];
+#endif
+#if defined( __CL_SHORT8__ )
+    __cl_short8     v8[2];
+#endif
+#if defined( __CL_SHORT16__ )
+    __cl_short16    v16;
+#endif
+}cl_short16;
+
+
+/* ---- cl_ushortn ---- */
+typedef union
+{
+    cl_ushort  CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y; };
+   __extension__ struct{ cl_ushort  s0, s1; };
+   __extension__ struct{ cl_ushort  lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+    __cl_ushort2     v2;
+#endif
+}cl_ushort2;
+
+typedef union
+{
+    cl_ushort  CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y, z, w; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3; };
+   __extension__ struct{ cl_ushort2 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+    __cl_ushort2     v2[2];
+#endif
+#if defined( __CL_USHORT4__)
+    __cl_ushort4     v4;
+#endif
+}cl_ushort4;
+
+/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
+typedef  cl_ushort4  cl_ushort3;
+
+typedef union
+{
+    cl_ushort   CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y, z, w; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_ushort4 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+    __cl_ushort2     v2[4];
+#endif
+#if defined( __CL_USHORT4__)
+    __cl_ushort4     v4[2];
+#endif
+#if defined( __CL_USHORT8__ )
+    __cl_ushort8     v8;
+#endif
+}cl_ushort8;
+
+typedef union
+{
+    cl_ushort  CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_ushort8 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__)
+    __cl_ushort2     v2[8];
+#endif
+#if defined( __CL_USHORT4__)
+    __cl_ushort4     v4[4];
+#endif
+#if defined( __CL_USHORT8__ )
+    __cl_ushort8     v8[2];
+#endif
+#if defined( __CL_USHORT16__ )
+    __cl_ushort16    v16;
+#endif
+}cl_ushort16;
+
+/* ---- cl_intn ---- */
+typedef union
+{
+    cl_int  CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y; };
+   __extension__ struct{ cl_int  s0, s1; };
+   __extension__ struct{ cl_int  lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+    __cl_int2     v2;
+#endif
+}cl_int2;
+
+typedef union
+{
+    cl_int  CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y, z, w; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3; };
+   __extension__ struct{ cl_int2 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+    __cl_int2     v2[2];
+#endif
+#if defined( __CL_INT4__)
+    __cl_int4     v4;
+#endif
+}cl_int4;
+
+/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
+typedef  cl_int4  cl_int3;
+
+typedef union
+{
+    cl_int   CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y, z, w; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_int4 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+    __cl_int2     v2[4];
+#endif
+#if defined( __CL_INT4__)
+    __cl_int4     v4[2];
+#endif
+#if defined( __CL_INT8__ )
+    __cl_int8     v8;
+#endif
+}cl_int8;
+
+typedef union
+{
+    cl_int  CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_int8 lo, hi; };
+#endif
+#if defined( __CL_INT2__)
+    __cl_int2     v2[8];
+#endif
+#if defined( __CL_INT4__)
+    __cl_int4     v4[4];
+#endif
+#if defined( __CL_INT8__ )
+    __cl_int8     v8[2];
+#endif
+#if defined( __CL_INT16__ )
+    __cl_int16    v16;
+#endif
+}cl_int16;
+
+
+/* ---- cl_uintn ---- */
+typedef union
+{
+    cl_uint  CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y; };
+   __extension__ struct{ cl_uint  s0, s1; };
+   __extension__ struct{ cl_uint  lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+    __cl_uint2     v2;
+#endif
+}cl_uint2;
+
+typedef union
+{
+    cl_uint  CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y, z, w; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3; };
+   __extension__ struct{ cl_uint2 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+    __cl_uint2     v2[2];
+#endif
+#if defined( __CL_UINT4__)
+    __cl_uint4     v4;
+#endif
+}cl_uint4;
+
+/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
+typedef  cl_uint4  cl_uint3;
+
+typedef union
+{
+    cl_uint   CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y, z, w; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_uint4 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+    __cl_uint2     v2[4];
+#endif
+#if defined( __CL_UINT4__)
+    __cl_uint4     v4[2];
+#endif
+#if defined( __CL_UINT8__ )
+    __cl_uint8     v8;
+#endif
+}cl_uint8;
+
+typedef union
+{
+    cl_uint  CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_uint8 lo, hi; };
+#endif
+#if defined( __CL_UINT2__)
+    __cl_uint2     v2[8];
+#endif
+#if defined( __CL_UINT4__)
+    __cl_uint4     v4[4];
+#endif
+#if defined( __CL_UINT8__ )
+    __cl_uint8     v8[2];
+#endif
+#if defined( __CL_UINT16__ )
+    __cl_uint16    v16;
+#endif
+}cl_uint16;
+
+/* ---- cl_longn ---- */
+typedef union
+{
+    cl_long  CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y; };
+   __extension__ struct{ cl_long  s0, s1; };
+   __extension__ struct{ cl_long  lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+    __cl_long2     v2;
+#endif
+}cl_long2;
+
+typedef union
+{
+    cl_long  CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y, z, w; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3; };
+   __extension__ struct{ cl_long2 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+    __cl_long2     v2[2];
+#endif
+#if defined( __CL_LONG4__)
+    __cl_long4     v4;
+#endif
+}cl_long4;
+
+/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
+typedef  cl_long4  cl_long3;
+
+typedef union
+{
+    cl_long   CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y, z, w; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_long4 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+    __cl_long2     v2[4];
+#endif
+#if defined( __CL_LONG4__)
+    __cl_long4     v4[2];
+#endif
+#if defined( __CL_LONG8__ )
+    __cl_long8     v8;
+#endif
+}cl_long8;
+
+typedef union
+{
+    cl_long  CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_long8 lo, hi; };
+#endif
+#if defined( __CL_LONG2__)
+    __cl_long2     v2[8];
+#endif
+#if defined( __CL_LONG4__)
+    __cl_long4     v4[4];
+#endif
+#if defined( __CL_LONG8__ )
+    __cl_long8     v8[2];
+#endif
+#if defined( __CL_LONG16__ )
+    __cl_long16    v16;
+#endif
+}cl_long16;
+
+
+/* ---- cl_ulongn ---- */
+typedef union
+{
+    cl_ulong  CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y; };
+   __extension__ struct{ cl_ulong  s0, s1; };
+   __extension__ struct{ cl_ulong  lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+    __cl_ulong2     v2;
+#endif
+}cl_ulong2;
+
+typedef union
+{
+    cl_ulong  CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y, z, w; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3; };
+   __extension__ struct{ cl_ulong2 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+    __cl_ulong2     v2[2];
+#endif
+#if defined( __CL_ULONG4__)
+    __cl_ulong4     v4;
+#endif
+}cl_ulong4;
+
+/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
+typedef  cl_ulong4  cl_ulong3;
+
+typedef union
+{
+    cl_ulong   CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y, z, w; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_ulong4 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+    __cl_ulong2     v2[4];
+#endif
+#if defined( __CL_ULONG4__)
+    __cl_ulong4     v4[2];
+#endif
+#if defined( __CL_ULONG8__ )
+    __cl_ulong8     v8;
+#endif
+}cl_ulong8;
+
+typedef union
+{
+    cl_ulong  CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_ulong8 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__)
+    __cl_ulong2     v2[8];
+#endif
+#if defined( __CL_ULONG4__)
+    __cl_ulong4     v4[4];
+#endif
+#if defined( __CL_ULONG8__ )
+    __cl_ulong8     v8[2];
+#endif
+#if defined( __CL_ULONG16__ )
+    __cl_ulong16    v16;
+#endif
+}cl_ulong16;
+
+
+/* --- cl_floatn ---- */
+
+typedef union
+{
+    cl_float  CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float  x, y; };
+   __extension__ struct{ cl_float  s0, s1; };
+   __extension__ struct{ cl_float  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+    __cl_float2     v2;
+#endif
+}cl_float2;
+
+typedef union
+{
+    cl_float  CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float   x, y, z, w; };
+   __extension__ struct{ cl_float   s0, s1, s2, s3; };
+   __extension__ struct{ cl_float2  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+    __cl_float2     v2[2];
+#endif
+#if defined( __CL_FLOAT4__)
+    __cl_float4     v4;
+#endif
+}cl_float4;
+
+/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
+typedef  cl_float4  cl_float3;
+
+typedef union
+{
+    cl_float   CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float   x, y, z, w; };
+   __extension__ struct{ cl_float   s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_float4  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+    __cl_float2     v2[4];
+#endif
+#if defined( __CL_FLOAT4__)
+    __cl_float4     v4[2];
+#endif
+#if defined( __CL_FLOAT8__ )
+    __cl_float8     v8;
+#endif
+}cl_float8;
+
+typedef union
+{
+    cl_float  CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_float  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_float8 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__)
+    __cl_float2     v2[8];
+#endif
+#if defined( __CL_FLOAT4__)
+    __cl_float4     v4[4];
+#endif
+#if defined( __CL_FLOAT8__ )
+    __cl_float8     v8[2];
+#endif
+#if defined( __CL_FLOAT16__ )
+    __cl_float16    v16;
+#endif
+}cl_float16;
+
+/* --- cl_doublen ---- */
+
+typedef union
+{
+    cl_double  CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y; };
+   __extension__ struct{ cl_double s0, s1; };
+   __extension__ struct{ cl_double lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+    __cl_double2     v2;
+#endif
+}cl_double2;
+
+typedef union
+{
+    cl_double  CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y, z, w; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3; };
+   __extension__ struct{ cl_double2 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+    __cl_double2     v2[2];
+#endif
+#if defined( __CL_DOUBLE4__)
+    __cl_double4     v4;
+#endif
+}cl_double4;
+
+/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
+typedef  cl_double4  cl_double3;
+
+typedef union
+{
+    cl_double   CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y, z, w; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_double4 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+    __cl_double2     v2[4];
+#endif
+#if defined( __CL_DOUBLE4__)
+    __cl_double4     v4[2];
+#endif
+#if defined( __CL_DOUBLE8__ )
+    __cl_double8     v8;
+#endif
+}cl_double8;
+
+typedef union
+{
+    cl_double  CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_double8 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__)
+    __cl_double2     v2[8];
+#endif
+#if defined( __CL_DOUBLE4__)
+    __cl_double4     v4[4];
+#endif
+#if defined( __CL_DOUBLE8__ )
+    __cl_double8     v8[2];
+#endif
+#if defined( __CL_DOUBLE16__ )
+    __cl_double16    v16;
+#endif
+}cl_double16;
+
+/* Macro to facilitate debugging
+ * Usage:
+ *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
+ *   The first line ends with:   CL_PROGRAM_STRING_DEBUG_INFO \"
+ *   Each line thereafter of OpenCL C source must end with: \n\
+ *   The last line ends in ";
+ *
+ *   Example:
+ *
+ *   const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
+ *   kernel void foo( int a, float * b )             \n\
+ *   {                                               \n\
+ *      // my comment                                \n\
+ *      *b[ get_global_id(0)] = a;                   \n\
+ *   }                                               \n\
+ *   ";
+ *
+ * This should correctly set up the line, (column) and file information for your source
+ * string so you can do source level debugging.
+ */
+#define  __CL_STRINGIFY( _x )               # _x
+#define  _CL_STRINGIFY( _x )                __CL_STRINGIFY( _x )
+#define  CL_PROGRAM_STRING_DEBUG_INFO       "#line "  _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __CL_PLATFORM_H  */
diff --git a/x264.h b/x264.h
index d2a62deace0bfb3efe4bb903ff3bc4b1c3db9c0f..aab7121061098f4b73ec8575711f0ae28103bb36 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -41,7 +41,7 @@
 
 #include "x264_config.h"
 
-#define X264_BUILD 132
+#define X264_BUILD 133
 
 /* Application developers planning to link against a shared library version of
  * libx264 from a Microsoft Visual Studio or similar development environment