]> git.sesse.net Git - x264/blob - common/opencl.h
x86inc: fix AVX emulation of cmp(p|s)(s|d)
[x264] / common / opencl.h
1 /*****************************************************************************
2  * opencl.h: OpenCL structures and defines
3  *****************************************************************************
4  * Copyright (C) 2012-2013 x264 project
5  *
6  * Authors: Steve Borho <sborho@multicorewareinc.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21  *
22  * This program is also available under a commercial proprietary license.
23  * For more information, contact us at licensing@x264.com.
24  *****************************************************************************/
25
26 #ifndef X264_OPENCL_H
27 #define X264_OPENCL_H
28
29 #include "x264.h"
30 #include "common/common.h"
31
32 #include <CL/cl.h>
33
34 /* Number of downscale resolutions to use for motion search */
35 #define NUM_IMAGE_SCALES 4
36
37 /* Number of PCIe copies that can be queued before requiring a flush */
38 #define MAX_FINISH_COPIES 1024
39
40 /* Size (in bytes) of the page-locked buffer used for PCIe xfers */
41 #define PAGE_LOCKED_BUF_SIZE 32 * 1024 * 1024
42
43 typedef struct
44 {
45     cl_context       context;
46     cl_device_id     device;
47     cl_command_queue queue;
48
49     cl_program  lookahead_program;
50     cl_int      last_buf;
51
52     cl_mem      page_locked_buffer;
53     char       *page_locked_ptr;
54     int         pl_occupancy;
55
56     struct
57     {
58         void *src;
59         void *dest;
60         int   bytes;
61     } copies[MAX_FINISH_COPIES];
62     int         num_copies;
63
64     int         b_device_AMD_SI;
65     int         b_fatal_error;
66     int         lookahead_thread_pri;
67     int         opencl_thread_pri;
68
69     /* downscale lowres luma */
70     cl_kernel   downscale_hpel_kernel;
71     cl_kernel   downscale_kernel1;
72     cl_kernel   downscale_kernel2;
73     cl_mem      luma_16x16_image[2];
74
75     /* weightp filtering */
76     cl_kernel   weightp_hpel_kernel;
77     cl_kernel   weightp_scaled_images_kernel;
78     cl_mem      weighted_scaled_images[NUM_IMAGE_SCALES];
79     cl_mem      weighted_luma_hpel;
80
81     /* intra */
82     cl_kernel   memset_kernel;
83     cl_kernel   intra_kernel;
84     cl_kernel   rowsum_intra_kernel;
85     cl_mem      row_satds[2];
86
87     /* hierarchical motion estimation */
88     cl_kernel   hme_kernel;
89     cl_kernel   subpel_refine_kernel;
90     cl_mem      mv_buffers[2];
91     cl_mem      lowres_mv_costs;
92     cl_mem      mvp_buffer;
93
94     /* bidir */
95     cl_kernel   mode_select_kernel;
96     cl_kernel   rowsum_inter_kernel;
97     cl_mem      lowres_costs[2];
98     cl_mem      frame_stats[2]; /* cost_est, cost_est_aq, intra_mbs */
99 } x264_opencl_t;
100
101 typedef struct
102 {
103     cl_mem scaled_image2Ds[NUM_IMAGE_SCALES];
104     cl_mem luma_hpel;
105     cl_mem inv_qscale_factor;
106     cl_mem intra_cost;
107     cl_mem lowres_mvs0;
108     cl_mem lowres_mvs1;
109     cl_mem lowres_mv_costs0;
110     cl_mem lowres_mv_costs1;
111 } x264_frame_opencl_t;
112
113 typedef struct x264_frame x264_frame;
114
115 int x264_opencl_init( x264_t *h );
116 int x264_opencl_init_lookahead( x264_t *h );
117 void x264_opencl_free( x264_t *h );
118 void x264_opencl_frame_delete( x264_frame *frame );
119
120 #endif