]> git.sesse.net Git - ffmpeg/blob - libavfilter/dnn/dnn_backend_native.c
avfilter/dnn: get the data type of network output from dnn execution result
[ffmpeg] / libavfilter / dnn / dnn_backend_native.c
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25
26 #include "dnn_backend_native.h"
27 #include "libavutil/avassert.h"
28 #include "dnn_backend_native_layer_conv2d.h"
29 #include "dnn_backend_native_layers.h"
30
31 static DNNReturnType set_input_output_native(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output)
32 {
33     ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
34     DnnOperand *oprd = NULL;
35
36     if (network->layers_num <= 0 || network->operands_num <= 0)
37         return DNN_ERROR;
38
39     /* inputs */
40     av_assert0(input->dt == DNN_FLOAT);
41     for (int i = 0; i < network->operands_num; ++i) {
42         oprd = &network->operands[i];
43         if (strcmp(oprd->name, input_name) == 0) {
44             if (oprd->type != DOT_INPUT)
45                 return DNN_ERROR;
46             break;
47         }
48         oprd = NULL;
49     }
50
51     if (!oprd)
52         return DNN_ERROR;
53
54     oprd->dims[0] = 1;
55     oprd->dims[1] = input->height;
56     oprd->dims[2] = input->width;
57     oprd->dims[3] = input->channels;
58
59     av_freep(&oprd->data);
60     oprd->length = calculate_operand_data_length(oprd);
61     oprd->data = av_malloc(oprd->length);
62     if (!oprd->data)
63         return DNN_ERROR;
64
65     input->data = oprd->data;
66
67     /* outputs */
68     network->nb_output = 0;
69     av_freep(&network->output_indexes);
70     network->output_indexes = av_mallocz_array(nb_output, sizeof(*network->output_indexes));
71     if (!network->output_indexes)
72         return DNN_ERROR;
73
74     for (uint32_t i = 0; i < nb_output; ++i) {
75         const char *output_name = output_names[i];
76         for (int j = 0; j < network->operands_num; ++j) {
77             oprd = &network->operands[j];
78             if (strcmp(oprd->name, output_name) == 0) {
79                 network->output_indexes[network->nb_output++] = j;
80                 break;
81             }
82         }
83     }
84
85     if (network->nb_output != nb_output)
86         return DNN_ERROR;
87
88     return DNN_SUCCESS;
89 }
90
91 // Loads model and its parameters that are stored in a binary file with following structure:
92 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
93 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
94 // For DEPTH_TO_SPACE layer: block_size
95 DNNModel *ff_dnn_load_model_native(const char *model_filename)
96 {
97     DNNModel *model = NULL;
98     char header_expected[] = "FFMPEGDNNNATIVE";
99     char *buf;
100     size_t size;
101     int version, header_size, major_version_expected = 1;
102     ConvolutionalNetwork *network = NULL;
103     AVIOContext *model_file_context;
104     int file_size, dnn_size, parsed_size;
105     int32_t layer;
106     DNNLayerType layer_type;
107
108     model = av_malloc(sizeof(DNNModel));
109     if (!model){
110         return NULL;
111     }
112
113     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
114         av_freep(&model);
115         return NULL;
116     }
117     file_size = avio_size(model_file_context);
118
119     /**
120      * check file header with string and version
121      */
122     size = sizeof(header_expected);
123     buf = av_malloc(size);
124     if (!buf) {
125         avio_closep(&model_file_context);
126         av_freep(&model);
127         return NULL;
128     }
129
130     // size - 1 to skip the ending '\0' which is not saved in file
131     avio_get_str(model_file_context, size - 1, buf, size);
132     dnn_size = size - 1;
133     if (strncmp(buf, header_expected, size) != 0) {
134         av_freep(&buf);
135         avio_closep(&model_file_context);
136         av_freep(&model);
137         return NULL;
138     }
139     av_freep(&buf);
140
141     version = (int32_t)avio_rl32(model_file_context);
142     dnn_size += 4;
143     if (version != major_version_expected) {
144         avio_closep(&model_file_context);
145         av_freep(&model);
146         return NULL;
147     }
148
149     // currently no need to check minor version
150     version = (int32_t)avio_rl32(model_file_context);
151     dnn_size += 4;
152     header_size = dnn_size;
153
154     network = av_mallocz(sizeof(ConvolutionalNetwork));
155     if (!network){
156         avio_closep(&model_file_context);
157         av_freep(&model);
158         return NULL;
159     }
160     model->model = (void *)network;
161
162     avio_seek(model_file_context, file_size - 8, SEEK_SET);
163     network->layers_num = (int32_t)avio_rl32(model_file_context);
164     network->operands_num = (int32_t)avio_rl32(model_file_context);
165     dnn_size += 8;
166     avio_seek(model_file_context, header_size, SEEK_SET);
167
168     network->layers = av_mallocz(network->layers_num * sizeof(Layer));
169     if (!network->layers){
170         avio_closep(&model_file_context);
171         ff_dnn_free_model_native(&model);
172         return NULL;
173     }
174
175     network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand));
176     if (!network->operands){
177         avio_closep(&model_file_context);
178         ff_dnn_free_model_native(&model);
179         return NULL;
180     }
181
182     for (layer = 0; layer < network->layers_num; ++layer){
183         layer_type = (int32_t)avio_rl32(model_file_context);
184         dnn_size += 4;
185
186         if (layer_type >= DLT_COUNT) {
187             avio_closep(&model_file_context);
188             ff_dnn_free_model_native(&model);
189             return NULL;
190         }
191
192         network->layers[layer].type = layer_type;
193         parsed_size = layer_funcs[layer_type].pf_load(&network->layers[layer], model_file_context, file_size);
194         if (!parsed_size) {
195             avio_closep(&model_file_context);
196             ff_dnn_free_model_native(&model);
197             return NULL;
198         }
199         dnn_size += parsed_size;
200     }
201
202     for (int32_t i = 0; i < network->operands_num; ++i){
203         DnnOperand *oprd;
204         int32_t name_len;
205         int32_t operand_index = (int32_t)avio_rl32(model_file_context);
206         dnn_size += 4;
207
208         oprd = &network->operands[operand_index];
209         name_len = (int32_t)avio_rl32(model_file_context);
210         dnn_size += 4;
211
212         avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
213         dnn_size += name_len;
214
215         oprd->type = (int32_t)avio_rl32(model_file_context);
216         dnn_size += 4;
217
218         oprd->data_type = (int32_t)avio_rl32(model_file_context);
219         dnn_size += 4;
220
221         for (int32_t dim = 0; dim < 4; ++dim) {
222             oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
223             dnn_size += 4;
224         }
225
226         oprd->isNHWC = 1;
227     }
228
229     avio_closep(&model_file_context);
230
231     if (dnn_size != file_size){
232         ff_dnn_free_model_native(&model);
233         return NULL;
234     }
235
236     model->set_input_output = &set_input_output_native;
237
238     return model;
239 }
240
241 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
242 {
243     ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
244     int32_t layer;
245     uint32_t nb = FFMIN(nb_output, network->nb_output);
246
247     if (network->layers_num <= 0 || network->operands_num <= 0)
248         return DNN_ERROR;
249     if (!network->operands[0].data)
250         return DNN_ERROR;
251
252     for (layer = 0; layer < network->layers_num; ++layer){
253         DNNLayerType layer_type = network->layers[layer].type;
254         layer_funcs[layer_type].pf_exec(network->operands,
255                                   network->layers[layer].input_operand_indexes,
256                                   network->layers[layer].output_operand_index,
257                                   network->layers[layer].params);
258     }
259
260     for (uint32_t i = 0; i < nb; ++i) {
261         DnnOperand *oprd = &network->operands[network->output_indexes[i]];
262         outputs[i].data = oprd->data;
263         outputs[i].height = oprd->dims[1];
264         outputs[i].width = oprd->dims[2];
265         outputs[i].channels = oprd->dims[3];
266         outputs[i].dt = oprd->data_type;
267     }
268
269     return DNN_SUCCESS;
270 }
271
272 int32_t calculate_operand_dims_count(const DnnOperand *oprd)
273 {
274     int32_t result = 1;
275     for (int i = 0; i < 4; ++i)
276         result *= oprd->dims[i];
277
278     return result;
279 }
280
281 int32_t calculate_operand_data_length(const DnnOperand* oprd)
282 {
283     // currently, we just support DNN_FLOAT
284     return oprd->dims[0] * oprd->dims[1] * oprd->dims[2] * oprd->dims[3] * sizeof(float);
285 }
286
287 void ff_dnn_free_model_native(DNNModel **model)
288 {
289     ConvolutionalNetwork *network;
290     ConvolutionalParams *conv_params;
291     int32_t layer;
292
293     if (*model)
294     {
295         network = (ConvolutionalNetwork *)(*model)->model;
296         for (layer = 0; layer < network->layers_num; ++layer){
297             if (network->layers[layer].type == DLT_CONV2D){
298                 conv_params = (ConvolutionalParams *)network->layers[layer].params;
299                 av_freep(&conv_params->kernel);
300                 av_freep(&conv_params->biases);
301             }
302             av_freep(&network->layers[layer].params);
303         }
304         av_freep(&network->layers);
305
306         for (uint32_t operand = 0; operand < network->operands_num; ++operand)
307             av_freep(&network->operands[operand].data);
308         av_freep(&network->operands);
309
310         av_freep(&network->output_indexes);
311         av_freep(&network);
312         av_freep(model);
313     }
314 }