2 * Copyright (c) 2018 Sergey Lavrushkin
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * DNN native backend implementation.
26 #include "dnn_backend_native.h"
27 #include "libavutil/avassert.h"
28 #include "dnn_backend_native_layer_pad.h"
29 #include "dnn_backend_native_layer_conv2d.h"
30 #include "dnn_backend_native_layer_depth2space.h"
32 static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
34 ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
36 if (network->layers_num <= 0 || network->operands_num <= 0)
39 av_assert0(input->dt == DNN_FLOAT);
42 * as the first step, suppose network->operands[0] is the input operand.
44 network->operands[0].dims[0] = 1;
45 network->operands[0].dims[1] = input->height;
46 network->operands[0].dims[2] = input->width;
47 network->operands[0].dims[3] = input->channels;
48 network->operands[0].type = DOT_INPUT;
49 network->operands[0].data_type = DNN_FLOAT;
50 network->operands[0].isNHWC = 1;
52 av_freep(&network->operands[0].data);
53 network->operands[0].length = calculate_operand_data_length(&network->operands[0]);
54 network->operands[0].data = av_malloc(network->operands[0].length);
55 if (!network->operands[0].data)
58 input->data = network->operands[0].data;
62 // Loads model and its parameters that are stored in a binary file with following structure:
63 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
64 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
65 // For DEPTH_TO_SPACE layer: block_size
66 DNNModel *ff_dnn_load_model_native(const char *model_filename)
68 DNNModel *model = NULL;
69 char header_expected[] = "FFMPEGDNNNATIVE";
72 int version, header_size, major_version_expected = 0;
73 ConvolutionalNetwork *network = NULL;
74 AVIOContext *model_file_context;
75 int file_size, dnn_size, kernel_size, i;
77 DNNLayerType layer_type;
78 ConvolutionalParams *conv_params;
79 DepthToSpaceParams *depth_to_space_params;
80 LayerPadParams *pad_params;
82 model = av_malloc(sizeof(DNNModel));
87 if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
91 file_size = avio_size(model_file_context);
94 * check file header with string and version
96 size = sizeof(header_expected);
97 buf = av_malloc(size);
99 avio_closep(&model_file_context);
104 // size - 1 to skip the ending '\0' which is not saved in file
105 avio_get_str(model_file_context, size - 1, buf, size);
107 if (strncmp(buf, header_expected, size) != 0) {
109 avio_closep(&model_file_context);
115 version = (int32_t)avio_rl32(model_file_context);
117 if (version != major_version_expected) {
118 avio_closep(&model_file_context);
123 // currently no need to check minor version
124 version = (int32_t)avio_rl32(model_file_context);
126 header_size = dnn_size;
128 network = av_mallocz(sizeof(ConvolutionalNetwork));
130 avio_closep(&model_file_context);
134 model->model = (void *)network;
136 avio_seek(model_file_context, file_size - 8, SEEK_SET);
137 network->layers_num = (int32_t)avio_rl32(model_file_context);
138 network->operands_num = (int32_t)avio_rl32(model_file_context);
140 avio_seek(model_file_context, header_size, SEEK_SET);
142 network->layers = av_mallocz(network->layers_num * sizeof(Layer));
143 if (!network->layers){
144 avio_closep(&model_file_context);
145 ff_dnn_free_model_native(&model);
149 network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand));
150 if (!network->operands){
151 avio_closep(&model_file_context);
152 ff_dnn_free_model_native(&model);
156 for (layer = 0; layer < network->layers_num; ++layer){
157 layer_type = (int32_t)avio_rl32(model_file_context);
161 conv_params = av_malloc(sizeof(ConvolutionalParams));
163 avio_closep(&model_file_context);
164 ff_dnn_free_model_native(&model);
167 conv_params->dilation = (int32_t)avio_rl32(model_file_context);
168 conv_params->padding_method = (int32_t)avio_rl32(model_file_context);
169 conv_params->activation = (int32_t)avio_rl32(model_file_context);
170 conv_params->input_num = (int32_t)avio_rl32(model_file_context);
171 conv_params->output_num = (int32_t)avio_rl32(model_file_context);
172 conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
173 kernel_size = conv_params->input_num * conv_params->output_num *
174 conv_params->kernel_size * conv_params->kernel_size;
175 dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
176 if (dnn_size > file_size || conv_params->input_num <= 0 ||
177 conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
178 avio_closep(&model_file_context);
179 av_freep(&conv_params);
180 ff_dnn_free_model_native(&model);
183 conv_params->kernel = av_malloc(kernel_size * sizeof(float));
184 conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
185 if (!conv_params->kernel || !conv_params->biases){
186 avio_closep(&model_file_context);
187 av_freep(&conv_params->kernel);
188 av_freep(&conv_params->biases);
189 av_freep(&conv_params);
190 ff_dnn_free_model_native(&model);
193 for (i = 0; i < kernel_size; ++i){
194 conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
196 for (i = 0; i < conv_params->output_num; ++i){
197 conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
199 network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
200 network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
202 network->layers[layer].type = CONV;
203 network->layers[layer].params = conv_params;
206 depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
207 if (!depth_to_space_params){
208 avio_closep(&model_file_context);
209 ff_dnn_free_model_native(&model);
212 depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
214 network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
215 network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
217 network->layers[layer].type = DEPTH_TO_SPACE;
218 network->layers[layer].params = depth_to_space_params;
221 pad_params = av_malloc(sizeof(LayerPadParams));
223 avio_closep(&model_file_context);
224 ff_dnn_free_model_native(&model);
227 pad_params->mode = (int32_t)avio_rl32(model_file_context);
229 for (i = 0; i < 4; ++i) {
230 pad_params->paddings[i][0] = avio_rl32(model_file_context);
231 pad_params->paddings[i][1] = avio_rl32(model_file_context);
234 network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
235 network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
237 network->layers[layer].type = MIRROR_PAD;
238 network->layers[layer].params = pad_params;
241 avio_closep(&model_file_context);
242 ff_dnn_free_model_native(&model);
247 for (int32_t i = 0; i < network->operands_num; ++i){
250 int32_t operand_index = (int32_t)avio_rl32(model_file_context);
253 oprd = &network->operands[operand_index];
254 name_len = (int32_t)avio_rl32(model_file_context);
257 avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
258 dnn_size += name_len;
260 oprd->type = (int32_t)avio_rl32(model_file_context);
263 oprd->data_type = (int32_t)avio_rl32(model_file_context);
266 for (int32_t dim = 0; dim < 4; ++dim) {
267 oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
274 avio_closep(&model_file_context);
276 if (dnn_size != file_size){
277 ff_dnn_free_model_native(&model);
281 model->set_input_output = &set_input_output_native;
286 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
288 ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
290 ConvolutionalParams *conv_params;
291 DepthToSpaceParams *depth_to_space_params;
292 LayerPadParams *pad_params;
294 if (network->layers_num <= 0 || network->operands_num <= 0)
296 if (!network->operands[0].data)
299 for (layer = 0; layer < network->layers_num; ++layer){
300 switch (network->layers[layer].type){
302 conv_params = (ConvolutionalParams *)network->layers[layer].params;
303 convolve(network->operands, network->layers[layer].input_operand_indexes,
304 network->layers[layer].output_operand_index, conv_params);
307 depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
308 depth_to_space(network->operands, network->layers[layer].input_operand_indexes,
309 network->layers[layer].output_operand_index, depth_to_space_params->block_size);
312 pad_params = (LayerPadParams *)network->layers[layer].params;
313 dnn_execute_layer_pad(network->operands, network->layers[layer].input_operand_indexes,
314 network->layers[layer].output_operand_index, pad_params);
321 // native mode does not support multiple outputs yet
326 * as the first step, suppose network->operands[network->operands_num - 1] is the output operand.
328 outputs[0].data = network->operands[network->operands_num - 1].data;
329 outputs[0].height = network->operands[network->operands_num - 1].dims[1];
330 outputs[0].width = network->operands[network->operands_num - 1].dims[2];
331 outputs[0].channels = network->operands[network->operands_num - 1].dims[3];
336 int32_t calculate_operand_data_length(DnnOperand* operand)
338 // currently, we just support DNN_FLOAT
339 return operand->dims[0] * operand->dims[1] * operand->dims[2] * operand->dims[3] * sizeof(float);
342 void ff_dnn_free_model_native(DNNModel **model)
344 ConvolutionalNetwork *network;
345 ConvolutionalParams *conv_params;
350 network = (ConvolutionalNetwork *)(*model)->model;
351 for (layer = 0; layer < network->layers_num; ++layer){
352 if (network->layers[layer].type == CONV){
353 conv_params = (ConvolutionalParams *)network->layers[layer].params;
354 av_freep(&conv_params->kernel);
355 av_freep(&conv_params->biases);
357 av_freep(&network->layers[layer].params);
359 av_freep(&network->layers);
361 for (uint32_t operand = 0; operand < network->operands_num; ++operand)
362 av_freep(&network->operands[operand].data);
363 av_freep(&network->operands);