]> git.sesse.net Git - ffmpeg/blob - libavfilter/dnn_backend_native.c
lavc/qtrle: Avoid an unaligned 64-bit write.
[ffmpeg] / libavfilter / dnn_backend_native.c
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25
26 #include "dnn_backend_native.h"
27
28 static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output)
29 {
30     ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
31     InputParams *input_params;
32     ConvolutionalParams *conv_params;
33     DepthToSpaceParams *depth_to_space_params;
34     int cur_width, cur_height, cur_channels;
35     int32_t layer;
36
37     if (network->layers_num <= 0 || network->layers[0].type != INPUT){
38         return DNN_ERROR;
39     }
40     else{
41         input_params = (InputParams *)network->layers[0].params;
42         input_params->width = cur_width = input->width;
43         input_params->height = cur_height = input->height;
44         input_params->channels = cur_channels = input->channels;
45         if (input->data){
46             av_freep(&input->data);
47         }
48         network->layers[0].output = input->data = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
49         if (!network->layers[0].output){
50             return DNN_ERROR;
51         }
52     }
53
54     for (layer = 1; layer < network->layers_num; ++layer){
55         switch (network->layers[layer].type){
56         case CONV:
57             conv_params = (ConvolutionalParams *)network->layers[layer].params;
58             if (conv_params->input_num != cur_channels){
59                 return DNN_ERROR;
60             }
61             cur_channels = conv_params->output_num;
62             break;
63         case DEPTH_TO_SPACE:
64             depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
65             if (cur_channels % (depth_to_space_params->block_size * depth_to_space_params->block_size) != 0){
66                 return DNN_ERROR;
67             }
68             cur_channels = cur_channels / (depth_to_space_params->block_size * depth_to_space_params->block_size);
69             cur_height *= depth_to_space_params->block_size;
70             cur_width *= depth_to_space_params->block_size;
71             break;
72         default:
73             return DNN_ERROR;
74         }
75         if (network->layers[layer].output){
76             av_freep(&network->layers[layer].output);
77         }
78         network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
79         if (!network->layers[layer].output){
80             return DNN_ERROR;
81         }
82     }
83
84     output->data = network->layers[network->layers_num - 1].output;
85     output->height = cur_height;
86     output->width = cur_width;
87     output->channels = cur_channels;
88
89     return DNN_SUCCESS;
90 }
91
92 // Loads model and its parameters that are stored in a binary file with following structure:
93 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
94 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
95 // For DEPTH_TO_SPACE layer: block_size
96 DNNModel *ff_dnn_load_model_native(const char *model_filename)
97 {
98     DNNModel *model = NULL;
99     ConvolutionalNetwork *network = NULL;
100     AVIOContext *model_file_context;
101     int file_size, dnn_size, kernel_size, i;
102     int32_t layer;
103     DNNLayerType layer_type;
104     ConvolutionalParams *conv_params;
105     DepthToSpaceParams *depth_to_space_params;
106
107     model = av_malloc(sizeof(DNNModel));
108     if (!model){
109         return NULL;
110     }
111
112     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
113         av_freep(&model);
114         return NULL;
115     }
116     file_size = avio_size(model_file_context);
117
118     network = av_malloc(sizeof(ConvolutionalNetwork));
119     if (!network){
120         avio_closep(&model_file_context);
121         av_freep(&model);
122         return NULL;
123     }
124     model->model = (void *)network;
125
126     network->layers_num = 1 + (int32_t)avio_rl32(model_file_context);
127     dnn_size = 4;
128
129     network->layers = av_malloc(network->layers_num * sizeof(Layer));
130     if (!network->layers){
131         av_freep(&network);
132         avio_closep(&model_file_context);
133         av_freep(&model);
134         return NULL;
135     }
136
137     for (layer = 0; layer < network->layers_num; ++layer){
138         network->layers[layer].output = NULL;
139         network->layers[layer].params = NULL;
140     }
141     network->layers[0].type = INPUT;
142     network->layers[0].params = av_malloc(sizeof(InputParams));
143     if (!network->layers[0].params){
144         avio_closep(&model_file_context);
145         ff_dnn_free_model_native(&model);
146         return NULL;
147     }
148
149     for (layer = 1; layer < network->layers_num; ++layer){
150         layer_type = (int32_t)avio_rl32(model_file_context);
151         dnn_size += 4;
152         switch (layer_type){
153         case CONV:
154             conv_params = av_malloc(sizeof(ConvolutionalParams));
155             if (!conv_params){
156                 avio_closep(&model_file_context);
157                 ff_dnn_free_model_native(&model);
158                 return NULL;
159             }
160             conv_params->activation = (int32_t)avio_rl32(model_file_context);
161             conv_params->input_num = (int32_t)avio_rl32(model_file_context);
162             conv_params->output_num = (int32_t)avio_rl32(model_file_context);
163             conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
164             kernel_size = conv_params->input_num * conv_params->output_num *
165                           conv_params->kernel_size * conv_params->kernel_size;
166             dnn_size += 16 + (kernel_size + conv_params->output_num << 2);
167             if (dnn_size > file_size || conv_params->input_num <= 0 ||
168                 conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
169                 avio_closep(&model_file_context);
170                 ff_dnn_free_model_native(&model);
171                 return NULL;
172             }
173             conv_params->kernel = av_malloc(kernel_size * sizeof(float));
174             conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
175             if (!conv_params->kernel || !conv_params->biases){
176                 avio_closep(&model_file_context);
177                 ff_dnn_free_model_native(&model);
178                 return NULL;
179             }
180             for (i = 0; i < kernel_size; ++i){
181                 conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
182             }
183             for (i = 0; i < conv_params->output_num; ++i){
184                 conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
185             }
186             network->layers[layer].type = CONV;
187             network->layers[layer].params = conv_params;
188             break;
189         case DEPTH_TO_SPACE:
190             depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
191             if (!depth_to_space_params){
192                 avio_closep(&model_file_context);
193                 ff_dnn_free_model_native(&model);
194                 return NULL;
195             }
196             depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
197             dnn_size += 4;
198             network->layers[layer].type = DEPTH_TO_SPACE;
199             network->layers[layer].params = depth_to_space_params;
200             break;
201         default:
202             avio_closep(&model_file_context);
203             ff_dnn_free_model_native(&model);
204             return NULL;
205         }
206     }
207
208     avio_closep(&model_file_context);
209
210     if (dnn_size != file_size){
211         ff_dnn_free_model_native(&model);
212         return NULL;
213     }
214
215     model->set_input_output = &set_input_output_native;
216
217     return model;
218 }
219
220 #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
221
222 static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height)
223 {
224     int y, x, n_filter, ch, kernel_y, kernel_x;
225     int radius = conv_params->kernel_size >> 1;
226     int src_linesize = width * conv_params->input_num;
227     int filter_linesize = conv_params->kernel_size * conv_params->input_num;
228     int filter_size = conv_params->kernel_size * filter_linesize;
229
230     for (y = 0; y < height; ++y){
231         for (x = 0; x < width; ++x){
232             for (n_filter = 0; n_filter < conv_params->output_num; ++n_filter){
233                 output[n_filter] = conv_params->biases[n_filter];
234                 for (ch = 0; ch < conv_params->input_num; ++ch){
235                     for (kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y){
236                         for (kernel_x = 0; kernel_x < conv_params->kernel_size; ++kernel_x){
237                             output[n_filter] += input[CLAMP_TO_EDGE(y + kernel_y - radius, height) * src_linesize +
238                                                       CLAMP_TO_EDGE(x + kernel_x - radius, width) * conv_params->input_num + ch] *
239                                                 conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
240                                                                     kernel_x * conv_params->input_num + ch];
241                         }
242                     }
243                 }
244                 switch (conv_params->activation){
245                 case RELU:
246                     output[n_filter] = FFMAX(output[n_filter], 0.0);
247                     break;
248                 case TANH:
249                     output[n_filter] = 2.0f  / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
250                     break;
251                 case SIGMOID:
252                     output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
253                 }
254             }
255             output += conv_params->output_num;
256         }
257     }
258 }
259
260 static void depth_to_space(const float *input, float *output, int block_size, int width, int height, int channels)
261 {
262     int y, x, by, bx, ch;
263     int new_channels = channels / (block_size * block_size);
264     int output_linesize = width * channels;
265     int by_linesize = output_linesize / block_size;
266     int x_linesize = new_channels * block_size;
267
268     for (y = 0; y < height; ++y){
269         for (x = 0; x < width; ++x){
270             for (by = 0; by < block_size; ++by){
271                 for (bx = 0; bx < block_size; ++bx){
272                     for (ch = 0; ch < new_channels; ++ch){
273                         output[by * by_linesize + x * x_linesize + bx * new_channels + ch] = input[ch];
274                     }
275                     input += new_channels;
276                 }
277             }
278         }
279         output += output_linesize;
280     }
281 }
282
283 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model)
284 {
285     ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
286     int cur_width, cur_height, cur_channels;
287     int32_t layer;
288     InputParams *input_params;
289     ConvolutionalParams *conv_params;
290     DepthToSpaceParams *depth_to_space_params;
291
292     if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
293         return DNN_ERROR;
294     }
295     else{
296         input_params = (InputParams *)network->layers[0].params;
297         cur_width = input_params->width;
298         cur_height = input_params->height;
299         cur_channels = input_params->channels;
300     }
301
302     for (layer = 1; layer < network->layers_num; ++layer){
303         if (!network->layers[layer].output){
304             return DNN_ERROR;
305         }
306         switch (network->layers[layer].type){
307         case CONV:
308             conv_params = (ConvolutionalParams *)network->layers[layer].params;
309             convolve(network->layers[layer - 1].output, network->layers[layer].output, conv_params, cur_width, cur_height);
310             cur_channels = conv_params->output_num;
311             break;
312         case DEPTH_TO_SPACE:
313             depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
314             depth_to_space(network->layers[layer - 1].output, network->layers[layer].output,
315                            depth_to_space_params->block_size, cur_width, cur_height, cur_channels);
316             cur_height *= depth_to_space_params->block_size;
317             cur_width *= depth_to_space_params->block_size;
318             cur_channels /= depth_to_space_params->block_size * depth_to_space_params->block_size;
319             break;
320         case INPUT:
321             return DNN_ERROR;
322         }
323     }
324
325     return DNN_SUCCESS;
326 }
327
328 void ff_dnn_free_model_native(DNNModel **model)
329 {
330     ConvolutionalNetwork *network;
331     ConvolutionalParams *conv_params;
332     int32_t layer;
333
334     if (*model)
335     {
336         network = (ConvolutionalNetwork *)(*model)->model;
337         for (layer = 0; layer < network->layers_num; ++layer){
338             av_freep(&network->layers[layer].output);
339             if (network->layers[layer].type == CONV){
340                 conv_params = (ConvolutionalParams *)network->layers[layer].params;
341                 av_freep(&conv_params->kernel);
342                 av_freep(&conv_params->biases);
343             }
344             av_freep(&network->layers[layer].params);
345         }
346         av_freep(&network->layers);
347         av_freep(&network);
348         av_freep(model);
349     }
350 }