]> git.sesse.net Git - ffmpeg/blob - libavfilter/dnn_backend_native.c
avformat/movenc: read track title from correct key
[ffmpeg] / libavfilter / dnn_backend_native.c
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25
26 #include "dnn_backend_native.h"
27 #include "dnn_srcnn.h"
28 #include "libavformat/avio.h"
29
30 typedef enum {INPUT, CONV} LayerType;
31
32 typedef struct Layer{
33     LayerType type;
34     float* output;
35     void* params;
36 } Layer;
37
38 typedef struct ConvolutionalParams{
39     int32_t input_num, output_num, kernel_size;
40     float* kernel;
41     float* biases;
42 } ConvolutionalParams;
43
44 typedef struct InputParams{
45     int height, width, channels;
46 } InputParams;
47
48 // Represents simple feed-forward convolutional network.
49 typedef struct ConvolutionalNetwork{
50     Layer* layers;
51     int32_t layers_num;
52 } ConvolutionalNetwork;
53
54 static DNNReturnType set_input_output_native(void* model, const DNNData* input, const DNNData* output)
55 {
56     ConvolutionalNetwork* network = (ConvolutionalNetwork*)model;
57     InputParams* input_params;
58     ConvolutionalParams* conv_params;
59     int cur_width, cur_height, cur_channels;
60     int32_t layer;
61
62     if (network->layers_num <= 0 || network->layers[0].type != INPUT){
63         return DNN_ERROR;
64     }
65     else{
66         network->layers[0].output = input->data;
67         input_params = (InputParams*)network->layers[0].params;
68         input_params->width = cur_width = input->width;
69         input_params->height = cur_height = input->height;
70         input_params->channels = cur_channels = input->channels;
71     }
72
73     for (layer = 1; layer < network->layers_num; ++layer){
74         switch (network->layers[layer].type){
75         case CONV:
76             conv_params = (ConvolutionalParams*)network->layers[layer].params;
77             if (conv_params->input_num != cur_channels){
78                 return DNN_ERROR;
79             }
80             cur_channels = conv_params->output_num;
81             if (layer < network->layers_num - 1){
82                 if (!network->layers[layer].output){
83                     av_freep(&network->layers[layer].output);
84                 }
85                 network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
86                 if (!network->layers[layer].output){
87                     return DNN_ERROR;
88                 }
89             }
90             else{
91                 network->layers[layer].output = output->data;
92                 if (output->width != cur_width || output->height != cur_height || output->channels != cur_channels){
93                     return DNN_ERROR;
94                 }
95             }
96             break;
97         default:
98             return DNN_ERROR;
99         }
100     }
101
102     return DNN_SUCCESS;
103 }
104
105 // Loads model and its parameters that are stored in a binary file with following structure:
106 // layers_num,conv_input_num,conv_output_num,conv_kernel_size,conv_kernel,conv_biases,conv_input_num...
107 DNNModel* ff_dnn_load_model_native(const char* model_filename)
108 {
109     DNNModel* model = NULL;
110     ConvolutionalNetwork* network = NULL;
111     AVIOContext* model_file_context;
112     int file_size, dnn_size, kernel_size, i;
113     int32_t layer;
114     ConvolutionalParams* conv_params;
115
116     model = av_malloc(sizeof(DNNModel));
117     if (!model){
118         return NULL;
119     }
120
121     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
122         av_freep(&model);
123         return NULL;
124     }
125     file_size = avio_size(model_file_context);
126
127     network = av_malloc(sizeof(ConvolutionalNetwork));
128     if (!network){
129         avio_closep(&model_file_context);
130         av_freep(&model);
131         return NULL;
132     }
133     model->model = (void*)network;
134
135     network->layers_num = 1 + (int32_t)avio_rl32(model_file_context);
136     dnn_size = 4;
137
138     network->layers = av_malloc(network->layers_num * sizeof(Layer));
139     if (!network->layers){
140         av_freep(&network);
141         avio_closep(&model_file_context);
142         av_freep(&model);
143         return NULL;
144     }
145
146     for (layer = 0; layer < network->layers_num; ++layer){
147         network->layers[layer].output = NULL;
148         network->layers[layer].params = NULL;
149     }
150     network->layers[0].type = INPUT;
151     network->layers[0].params = av_malloc(sizeof(InputParams));
152     if (!network->layers[0].params){
153         avio_closep(&model_file_context);
154         ff_dnn_free_model_native(&model);
155         return NULL;
156     }
157
158     for (layer = 1; layer < network->layers_num; ++layer){
159         conv_params = av_malloc(sizeof(ConvolutionalParams));
160         if (!conv_params){
161             avio_closep(&model_file_context);
162             ff_dnn_free_model_native(&model);
163             return NULL;
164         }
165         conv_params->input_num = (int32_t)avio_rl32(model_file_context);
166         conv_params->output_num = (int32_t)avio_rl32(model_file_context);
167         conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
168         kernel_size = conv_params->input_num * conv_params->output_num *
169                       conv_params->kernel_size * conv_params->kernel_size;
170         dnn_size += 12 + (kernel_size + conv_params->output_num << 2);
171         if (dnn_size > file_size || conv_params->input_num <= 0 ||
172             conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
173             avio_closep(&model_file_context);
174             ff_dnn_free_model_native(&model);
175             return NULL;
176         }
177         conv_params->kernel = av_malloc(kernel_size * sizeof(float));
178         conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
179         if (!conv_params->kernel || !conv_params->biases){
180             avio_closep(&model_file_context);
181             ff_dnn_free_model_native(&model);
182             return NULL;
183         }
184         for (i = 0; i < kernel_size; ++i){
185             conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
186         }
187         for (i = 0; i < conv_params->output_num; ++i){
188             conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
189         }
190         network->layers[layer].type = CONV;
191         network->layers[layer].params = conv_params;
192     }
193
194     avio_closep(&model_file_context);
195
196     if (dnn_size != file_size){
197         ff_dnn_free_model_native(&model);
198         return NULL;
199     }
200
201     model->set_input_output = &set_input_output_native;
202
203     return model;
204 }
205
206 static int set_up_conv_layer(Layer* layer, const float* kernel, const float* biases, int32_t input_num, int32_t output_num, int32_t size)
207 {
208     ConvolutionalParams* conv_params;
209     int kernel_size;
210
211     conv_params = av_malloc(sizeof(ConvolutionalParams));
212     if (!conv_params){
213         return DNN_ERROR;
214     }
215     conv_params->input_num = input_num;
216     conv_params->output_num = output_num;
217     conv_params->kernel_size = size;
218     kernel_size = input_num * output_num * size * size;
219     conv_params->kernel = av_malloc(kernel_size * sizeof(float));
220     conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
221     if (!conv_params->kernel || !conv_params->biases){
222         av_freep(&conv_params->kernel);
223         av_freep(&conv_params->biases);
224         av_freep(&conv_params);
225         return DNN_ERROR;
226     }
227     memcpy(conv_params->kernel, kernel, kernel_size * sizeof(float));
228     memcpy(conv_params->biases, biases, output_num * sizeof(float));
229     layer->type = CONV;
230     layer->params = conv_params;
231
232     return DNN_SUCCESS;
233 }
234
235 DNNModel* ff_dnn_load_default_model_native(DNNDefaultModel model_type)
236 {
237     DNNModel* model = NULL;
238     ConvolutionalNetwork* network = NULL;
239     int32_t layer;
240
241     model = av_malloc(sizeof(DNNModel));
242     if (!model){
243         return NULL;
244     }
245
246     network = av_malloc(sizeof(ConvolutionalNetwork));
247     if (!network){
248         av_freep(&model);
249         return NULL;
250     }
251     model->model = (void*)network;
252
253     switch (model_type){
254     case DNN_SRCNN:
255         network->layers_num = 4;
256
257         network->layers = av_malloc(network->layers_num * sizeof(Layer));
258         if (!network->layers){
259             av_freep(&network);
260             av_freep(&model);
261             return NULL;
262         }
263
264         for (layer = 0; layer < network->layers_num; ++layer){
265             network->layers[layer].output = NULL;
266             network->layers[layer].params = NULL;
267         }
268         network->layers[0].type = INPUT;
269         network->layers[0].params = av_malloc(sizeof(InputParams));
270         if (!network->layers[0].params){
271             ff_dnn_free_model_native(&model);
272             return NULL;
273         }
274
275         if (set_up_conv_layer(network->layers + 1, conv1_kernel, conv1_biases, 1, 64, 9) != DNN_SUCCESS ||
276             set_up_conv_layer(network->layers + 2, conv2_kernel, conv2_biases, 64, 32, 1) != DNN_SUCCESS ||
277             set_up_conv_layer(network->layers + 3, conv3_kernel, conv3_biases, 32, 1, 5) != DNN_SUCCESS){
278             ff_dnn_free_model_native(&model);
279             return NULL;
280         }
281
282         model->set_input_output = &set_input_output_native;
283
284         return model;
285     default:
286         av_freep(&network);
287         av_freep(&model);
288         return NULL;
289     }
290 }
291
292 #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
293
294 static void convolve(const float* input, float* output, const ConvolutionalParams* conv_params, int32_t width, int32_t height)
295 {
296     int y, x, n_filter, ch, kernel_y, kernel_x;
297     int radius = conv_params->kernel_size >> 1;
298     int src_linesize = width * conv_params->input_num;
299     int filter_linesize = conv_params->kernel_size * conv_params->input_num;
300     int filter_size = conv_params->kernel_size * filter_linesize;
301
302     for (y = 0; y < height; ++y){
303         for (x = 0; x < width; ++x){
304             for (n_filter = 0; n_filter < conv_params->output_num; ++n_filter){
305                 output[n_filter] = conv_params->biases[n_filter];
306                 for (ch = 0; ch < conv_params->input_num; ++ch){
307                     for (kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y){
308                         for (kernel_x = 0; kernel_x < conv_params->kernel_size; ++kernel_x){
309                             output[n_filter] += input[CLAMP_TO_EDGE(y + kernel_y - radius, height) * src_linesize +
310                                                       CLAMP_TO_EDGE(x + kernel_x - radius, width) * conv_params->input_num + ch] *
311                                                 conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
312                                                                     kernel_x * conv_params->input_num + ch];
313                         }
314                     }
315                 }
316                 output[n_filter] = FFMAX(output[n_filter], 0.0);
317             }
318             output += conv_params->output_num;
319         }
320     }
321 }
322
323 DNNReturnType ff_dnn_execute_model_native(const DNNModel* model)
324 {
325     ConvolutionalNetwork* network = (ConvolutionalNetwork*)model->model;
326     InputParams* input_params;
327     int cur_width, cur_height;
328     int32_t layer;
329
330     if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
331         return DNN_ERROR;
332     }
333     else{
334         input_params = (InputParams*)network->layers[0].params;
335         cur_width = input_params->width;
336         cur_height = input_params->height;
337     }
338
339     for (layer = 1; layer < network->layers_num; ++layer){
340         if (!network->layers[layer].output){
341             return DNN_ERROR;
342         }
343         switch (network->layers[layer].type){
344         case CONV:
345             convolve(network->layers[layer - 1].output, network->layers[layer].output, (ConvolutionalParams*)network->layers[layer].params, cur_width, cur_height);
346             break;
347         case INPUT:
348             return DNN_ERROR;
349         }
350     }
351
352     return DNN_SUCCESS;
353 }
354
355 void ff_dnn_free_model_native(DNNModel** model)
356 {
357     ConvolutionalNetwork* network;
358     ConvolutionalParams* conv_params;
359     int32_t layer;
360
361     if (*model)
362     {
363         network = (ConvolutionalNetwork*)(*model)->model;
364         for (layer = 0; layer < network->layers_num; ++layer){
365             switch (network->layers[layer].type){
366             case CONV:
367                 if (layer < network->layers_num - 1){
368                     av_freep(&network->layers[layer].output);
369                 }
370                 conv_params = (ConvolutionalParams*)network->layers[layer].params;
371                 av_freep(&conv_params->kernel);
372                 av_freep(&conv_params->biases);
373                 av_freep(&conv_params);
374                 break;
375             case INPUT:
376                 av_freep(&network->layers[layer].params);
377             }
378         }
379         av_freep(network);
380         av_freep(model);
381     }
382 }