2 * Copyright (c) 2018 Sergey Lavrushkin
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * DNN tensorflow backend implementation.
26 #include "dnn_backend_tf.h"
27 #include "dnn_backend_native.h"
28 #include "dnn_backend_native_layer_conv2d.h"
29 #include "libavformat/avio.h"
30 #include "libavutil/avassert.h"
31 #include "dnn_backend_native_layer_pad.h"
33 #include <tensorflow/c/c_api.h>
35 typedef struct TFModel{
40 TF_Tensor *input_tensor;
42 TF_Tensor **output_tensors;
46 static void free_buffer(void *data, size_t length)
51 static TF_Buffer *read_graph(const char *model_filename)
54 unsigned char *graph_data = NULL;
55 AVIOContext *model_file_context;
56 long size, bytes_read;
58 if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
62 size = avio_size(model_file_context);
64 graph_data = av_malloc(size);
66 avio_closep(&model_file_context);
69 bytes_read = avio_read(model_file_context, graph_data, size);
70 avio_closep(&model_file_context);
71 if (bytes_read != size){
72 av_freep(&graph_data);
76 graph_buf = TF_NewBuffer();
77 graph_buf->data = (void *)graph_data;
78 graph_buf->length = size;
79 graph_buf->data_deallocator = free_buffer;
84 static TF_Tensor *allocate_input_tensor(const DNNInputData *input)
88 int64_t input_dims[] = {1, input->height, input->width, input->channels};
99 av_assert0(!"should not reach here");
102 return TF_AllocateTensor(dt, input_dims, 4,
103 input_dims[1] * input_dims[2] * input_dims[3] * size);
106 static DNNReturnType set_input_output_tf(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
108 TFModel *tf_model = (TFModel *)model;
109 TF_SessionOptions *sess_opts;
110 const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
113 tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, input_name);
114 if (!tf_model->input.oper){
117 tf_model->input.index = 0;
118 if (tf_model->input_tensor){
119 TF_DeleteTensor(tf_model->input_tensor);
121 tf_model->input_tensor = allocate_input_tensor(input);
122 if (!tf_model->input_tensor){
125 input->data = (float *)TF_TensorData(tf_model->input_tensor);
131 av_freep(&tf_model->outputs);
132 tf_model->outputs = av_malloc_array(nb_output, sizeof(*tf_model->outputs));
133 if (!tf_model->outputs)
135 for (int i = 0; i < nb_output; ++i) {
136 tf_model->outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
137 if (!tf_model->outputs[i].oper){
138 av_freep(&tf_model->outputs);
141 tf_model->outputs[i].index = 0;
144 if (tf_model->output_tensors) {
145 for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
146 if (tf_model->output_tensors[i]) {
147 TF_DeleteTensor(tf_model->output_tensors[i]);
148 tf_model->output_tensors[i] = NULL;
152 av_freep(&tf_model->output_tensors);
153 tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors));
154 if (!tf_model->output_tensors) {
155 av_freep(&tf_model->outputs);
159 tf_model->nb_output = nb_output;
161 if (tf_model->session){
162 TF_CloseSession(tf_model->session, tf_model->status);
163 TF_DeleteSession(tf_model->session, tf_model->status);
166 sess_opts = TF_NewSessionOptions();
167 tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
168 TF_DeleteSessionOptions(sess_opts);
169 if (TF_GetCode(tf_model->status) != TF_OK)
174 // Run initialization operation with name "init" if it is present in graph
176 TF_SessionRun(tf_model->session, NULL,
179 &init_op, 1, NULL, tf_model->status);
180 if (TF_GetCode(tf_model->status) != TF_OK)
189 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
191 TF_Buffer *graph_def;
192 TF_ImportGraphDefOptions *graph_opts;
194 graph_def = read_graph(model_filename);
198 tf_model->graph = TF_NewGraph();
199 tf_model->status = TF_NewStatus();
200 graph_opts = TF_NewImportGraphDefOptions();
201 TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
202 TF_DeleteImportGraphDefOptions(graph_opts);
203 TF_DeleteBuffer(graph_def);
204 if (TF_GetCode(tf_model->status) != TF_OK){
205 TF_DeleteGraph(tf_model->graph);
206 TF_DeleteStatus(tf_model->status);
213 #define NAME_BUFFER_SIZE 256
215 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
216 ConvolutionalParams* params, const int layer)
219 TF_OperationDescription *op_desc;
221 int64_t strides[] = {1, 1, 1, 1};
225 char name_buffer[NAME_BUFFER_SIZE];
228 size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
231 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
232 op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
233 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
234 dims[0] = params->output_num;
235 dims[1] = params->kernel_size;
236 dims[2] = params->kernel_size;
237 dims[3] = params->input_num;
239 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
240 memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
241 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
242 if (TF_GetCode(tf_model->status) != TF_OK){
245 op = TF_FinishOperation(op_desc, tf_model->status);
246 if (TF_GetCode(tf_model->status) != TF_OK){
250 snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
251 op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
253 TF_AddInput(op_desc, input);
254 input.oper = transpose_op;
255 TF_AddInput(op_desc, input);
256 TF_SetAttrType(op_desc, "T", TF_FLOAT);
257 TF_SetAttrType(op_desc, "Tperm", TF_INT32);
258 op = TF_FinishOperation(op_desc, tf_model->status);
259 if (TF_GetCode(tf_model->status) != TF_OK){
263 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
264 op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
265 input.oper = *cur_op;
266 TF_AddInput(op_desc, input);
268 TF_AddInput(op_desc, input);
269 TF_SetAttrType(op_desc, "T", TF_FLOAT);
270 TF_SetAttrIntList(op_desc, "strides", strides, 4);
271 TF_SetAttrString(op_desc, "padding", "VALID", 5);
272 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
273 if (TF_GetCode(tf_model->status) != TF_OK){
277 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
278 op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
279 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
280 dims[0] = params->output_num;
282 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
283 memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
284 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
285 if (TF_GetCode(tf_model->status) != TF_OK){
288 op = TF_FinishOperation(op_desc, tf_model->status);
289 if (TF_GetCode(tf_model->status) != TF_OK){
293 snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
294 op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
295 input.oper = *cur_op;
296 TF_AddInput(op_desc, input);
298 TF_AddInput(op_desc, input);
299 TF_SetAttrType(op_desc, "T", TF_FLOAT);
300 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
301 if (TF_GetCode(tf_model->status) != TF_OK){
305 snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
306 switch (params->activation){
308 op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
311 op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
314 op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
319 input.oper = *cur_op;
320 TF_AddInput(op_desc, input);
321 TF_SetAttrType(op_desc, "T", TF_FLOAT);
322 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
323 if (TF_GetCode(tf_model->status) != TF_OK){
330 static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
331 DepthToSpaceParams *params, const int layer)
333 TF_OperationDescription *op_desc;
335 char name_buffer[NAME_BUFFER_SIZE];
337 snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
338 op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
339 input.oper = *cur_op;
341 TF_AddInput(op_desc, input);
342 TF_SetAttrType(op_desc, "T", TF_FLOAT);
343 TF_SetAttrInt(op_desc, "block_size", params->block_size);
344 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
345 if (TF_GetCode(tf_model->status) != TF_OK){
352 static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
353 LayerPadParams *params, const int layer)
357 TF_OperationDescription *op_desc;
360 int64_t pads_shape[] = {4, 2};
362 char name_buffer[NAME_BUFFER_SIZE];
363 snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
365 op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
366 TF_SetAttrType(op_desc, "dtype", TF_INT32);
367 tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
368 pads = (int32_t *)TF_TensorData(tensor);
369 pads[0] = params->paddings[0][0];
370 pads[1] = params->paddings[0][1];
371 pads[2] = params->paddings[1][0];
372 pads[3] = params->paddings[1][1];
373 pads[4] = params->paddings[2][0];
374 pads[5] = params->paddings[2][1];
375 pads[6] = params->paddings[3][0];
376 pads[7] = params->paddings[3][1];
377 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
378 if (TF_GetCode(tf_model->status) != TF_OK){
381 op = TF_FinishOperation(op_desc, tf_model->status);
382 if (TF_GetCode(tf_model->status) != TF_OK){
386 op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
387 input.oper = *cur_op;
389 TF_AddInput(op_desc, input);
391 TF_AddInput(op_desc, input);
392 TF_SetAttrType(op_desc, "T", TF_FLOAT);
393 TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
394 TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
395 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
396 if (TF_GetCode(tf_model->status) != TF_OK){
403 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
406 TF_OperationDescription *op_desc;
408 TF_Operation *transpose_op;
411 int32_t *transpose_perm;
412 int64_t transpose_perm_shape[] = {4};
413 int64_t input_shape[] = {1, -1, -1, -1};
414 DNNReturnType layer_add_res;
415 DNNModel *native_model = NULL;
416 ConvolutionalNetwork *conv_network;
418 native_model = ff_dnn_load_model_native(model_filename);
423 conv_network = (ConvolutionalNetwork *)native_model->model;
424 tf_model->graph = TF_NewGraph();
425 tf_model->status = TF_NewStatus();
427 #define CLEANUP_ON_ERROR(tf_model) \
429 TF_DeleteGraph(tf_model->graph); \
430 TF_DeleteStatus(tf_model->status); \
434 op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
435 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
436 TF_SetAttrShape(op_desc, "shape", input_shape, 4);
437 op = TF_FinishOperation(op_desc, tf_model->status);
438 if (TF_GetCode(tf_model->status) != TF_OK){
439 CLEANUP_ON_ERROR(tf_model);
442 op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
443 TF_SetAttrType(op_desc, "dtype", TF_INT32);
444 tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
445 transpose_perm = (int32_t *)TF_TensorData(tensor);
446 transpose_perm[0] = 1;
447 transpose_perm[1] = 2;
448 transpose_perm[2] = 3;
449 transpose_perm[3] = 0;
450 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
451 if (TF_GetCode(tf_model->status) != TF_OK){
452 CLEANUP_ON_ERROR(tf_model);
454 transpose_op = TF_FinishOperation(op_desc, tf_model->status);
456 for (layer = 0; layer < conv_network->layers_num; ++layer){
457 switch (conv_network->layers[layer].type){
459 layer_add_res = DNN_SUCCESS;
462 layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
463 (ConvolutionalParams *)conv_network->layers[layer].params, layer);
466 layer_add_res = add_depth_to_space_layer(tf_model, &op,
467 (DepthToSpaceParams *)conv_network->layers[layer].params, layer);
470 layer_add_res = add_pad_layer(tf_model, &op,
471 (LayerPadParams *)conv_network->layers[layer].params, layer);
474 CLEANUP_ON_ERROR(tf_model);
477 if (layer_add_res != DNN_SUCCESS){
478 CLEANUP_ON_ERROR(tf_model);
482 op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
485 TF_AddInput(op_desc, input);
486 TF_FinishOperation(op_desc, tf_model->status);
487 if (TF_GetCode(tf_model->status) != TF_OK){
488 CLEANUP_ON_ERROR(tf_model);
491 ff_dnn_free_model_native(&native_model);
496 DNNModel *ff_dnn_load_model_tf(const char *model_filename)
498 DNNModel *model = NULL;
499 TFModel *tf_model = NULL;
501 model = av_malloc(sizeof(DNNModel));
506 tf_model = av_mallocz(sizeof(TFModel));
512 if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
513 if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
521 model->model = (void *)tf_model;
522 model->set_input_output = &set_input_output_tf;
529 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
531 TFModel *tf_model = (TFModel *)model->model;
532 uint32_t nb = FFMIN(nb_output, tf_model->nb_output);
536 av_assert0(tf_model->output_tensors);
537 for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
538 if (tf_model->output_tensors[i]) {
539 TF_DeleteTensor(tf_model->output_tensors[i]);
540 tf_model->output_tensors[i] = NULL;
544 TF_SessionRun(tf_model->session, NULL,
545 &tf_model->input, &tf_model->input_tensor, 1,
546 tf_model->outputs, tf_model->output_tensors, nb,
547 NULL, 0, NULL, tf_model->status);
549 if (TF_GetCode(tf_model->status) != TF_OK){
553 for (uint32_t i = 0; i < nb; ++i) {
554 outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1);
555 outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2);
556 outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3);
557 outputs[i].data = TF_TensorData(tf_model->output_tensors[i]);
563 void ff_dnn_free_model_tf(DNNModel **model)
568 tf_model = (TFModel *)(*model)->model;
569 if (tf_model->graph){
570 TF_DeleteGraph(tf_model->graph);
572 if (tf_model->session){
573 TF_CloseSession(tf_model->session, tf_model->status);
574 TF_DeleteSession(tf_model->session, tf_model->status);
576 if (tf_model->status){
577 TF_DeleteStatus(tf_model->status);
579 if (tf_model->input_tensor){
580 TF_DeleteTensor(tf_model->input_tensor);
582 if (tf_model->output_tensors) {
583 for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
584 if (tf_model->output_tensors[i]) {
585 TF_DeleteTensor(tf_model->output_tensors[i]);
586 tf_model->output_tensors[i] = NULL;
590 av_freep(&tf_model->outputs);
591 av_freep(&tf_model->output_tensors);