2 * Copyright (c) 2018 Sergey Lavrushkin
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * DNN tensorflow backend implementation.
26 #include "dnn_backend_tf.h"
27 #include "dnn_backend_native.h"
28 #include "dnn_backend_native_layer_conv2d.h"
29 #include "dnn_backend_native_layer_depth2space.h"
30 #include "libavformat/avio.h"
31 #include "libavutil/avassert.h"
32 #include "dnn_backend_native_layer_pad.h"
33 #include "dnn_backend_native_layer_maximum.h"
35 #include <tensorflow/c/c_api.h>
37 typedef struct TFModel{
42 TF_Tensor *input_tensor;
44 TF_Tensor **output_tensors;
48 static void free_buffer(void *data, size_t length)
53 static TF_Buffer *read_graph(const char *model_filename)
56 unsigned char *graph_data = NULL;
57 AVIOContext *model_file_context;
58 long size, bytes_read;
60 if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
64 size = avio_size(model_file_context);
66 graph_data = av_malloc(size);
68 avio_closep(&model_file_context);
71 bytes_read = avio_read(model_file_context, graph_data, size);
72 avio_closep(&model_file_context);
73 if (bytes_read != size){
74 av_freep(&graph_data);
78 graph_buf = TF_NewBuffer();
79 graph_buf->data = (void *)graph_data;
80 graph_buf->length = size;
81 graph_buf->data_deallocator = free_buffer;
86 static TF_Tensor *allocate_input_tensor(const DNNData *input)
90 int64_t input_dims[] = {1, input->height, input->width, input->channels};
101 av_assert0(!"should not reach here");
104 return TF_AllocateTensor(dt, input_dims, 4,
105 input_dims[1] * input_dims[2] * input_dims[3] * size);
108 static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input_name)
110 TFModel *tf_model = (TFModel *)model;
115 tf_output.oper = TF_GraphOperationByName(tf_model->graph, input_name);
120 input->dt = TF_OperationOutputType(tf_output);
122 status = TF_NewStatus();
123 TF_GraphGetTensorShape(tf_model->graph, tf_output, dims, 4, status);
124 if (TF_GetCode(status) != TF_OK){
125 TF_DeleteStatus(status);
128 TF_DeleteStatus(status);
130 // currently only NHWC is supported
131 av_assert0(dims[0] == 1);
132 input->height = dims[1];
133 input->width = dims[2];
134 input->channels = dims[3];
139 static DNNReturnType set_input_output_tf(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output)
141 TFModel *tf_model = (TFModel *)model;
142 TF_SessionOptions *sess_opts;
143 const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
146 tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, input_name);
147 if (!tf_model->input.oper){
150 tf_model->input.index = 0;
151 if (tf_model->input_tensor){
152 TF_DeleteTensor(tf_model->input_tensor);
154 tf_model->input_tensor = allocate_input_tensor(input);
155 if (!tf_model->input_tensor){
158 input->data = (float *)TF_TensorData(tf_model->input_tensor);
164 av_freep(&tf_model->outputs);
165 tf_model->outputs = av_malloc_array(nb_output, sizeof(*tf_model->outputs));
166 if (!tf_model->outputs)
168 for (int i = 0; i < nb_output; ++i) {
169 tf_model->outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
170 if (!tf_model->outputs[i].oper){
171 av_freep(&tf_model->outputs);
174 tf_model->outputs[i].index = 0;
177 if (tf_model->output_tensors) {
178 for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
179 if (tf_model->output_tensors[i]) {
180 TF_DeleteTensor(tf_model->output_tensors[i]);
181 tf_model->output_tensors[i] = NULL;
185 av_freep(&tf_model->output_tensors);
186 tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors));
187 if (!tf_model->output_tensors) {
188 av_freep(&tf_model->outputs);
192 tf_model->nb_output = nb_output;
194 if (tf_model->session){
195 TF_CloseSession(tf_model->session, tf_model->status);
196 TF_DeleteSession(tf_model->session, tf_model->status);
199 sess_opts = TF_NewSessionOptions();
200 tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
201 TF_DeleteSessionOptions(sess_opts);
202 if (TF_GetCode(tf_model->status) != TF_OK)
207 // Run initialization operation with name "init" if it is present in graph
209 TF_SessionRun(tf_model->session, NULL,
212 &init_op, 1, NULL, tf_model->status);
213 if (TF_GetCode(tf_model->status) != TF_OK)
222 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
224 TF_Buffer *graph_def;
225 TF_ImportGraphDefOptions *graph_opts;
227 graph_def = read_graph(model_filename);
231 tf_model->graph = TF_NewGraph();
232 tf_model->status = TF_NewStatus();
233 graph_opts = TF_NewImportGraphDefOptions();
234 TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
235 TF_DeleteImportGraphDefOptions(graph_opts);
236 TF_DeleteBuffer(graph_def);
237 if (TF_GetCode(tf_model->status) != TF_OK){
238 TF_DeleteGraph(tf_model->graph);
239 TF_DeleteStatus(tf_model->status);
246 #define NAME_BUFFER_SIZE 256
248 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
249 ConvolutionalParams* params, const int layer)
252 TF_OperationDescription *op_desc;
254 int64_t strides[] = {1, 1, 1, 1};
258 char name_buffer[NAME_BUFFER_SIZE];
261 size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
264 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
265 op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
266 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
267 dims[0] = params->output_num;
268 dims[1] = params->kernel_size;
269 dims[2] = params->kernel_size;
270 dims[3] = params->input_num;
272 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
273 memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
274 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
275 if (TF_GetCode(tf_model->status) != TF_OK){
278 op = TF_FinishOperation(op_desc, tf_model->status);
279 if (TF_GetCode(tf_model->status) != TF_OK){
283 snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
284 op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
286 TF_AddInput(op_desc, input);
287 input.oper = transpose_op;
288 TF_AddInput(op_desc, input);
289 TF_SetAttrType(op_desc, "T", TF_FLOAT);
290 TF_SetAttrType(op_desc, "Tperm", TF_INT32);
291 op = TF_FinishOperation(op_desc, tf_model->status);
292 if (TF_GetCode(tf_model->status) != TF_OK){
296 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
297 op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
298 input.oper = *cur_op;
299 TF_AddInput(op_desc, input);
301 TF_AddInput(op_desc, input);
302 TF_SetAttrType(op_desc, "T", TF_FLOAT);
303 TF_SetAttrIntList(op_desc, "strides", strides, 4);
304 TF_SetAttrString(op_desc, "padding", "VALID", 5);
305 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
306 if (TF_GetCode(tf_model->status) != TF_OK){
310 snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
311 op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
312 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
313 dims[0] = params->output_num;
315 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
316 memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
317 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
318 if (TF_GetCode(tf_model->status) != TF_OK){
321 op = TF_FinishOperation(op_desc, tf_model->status);
322 if (TF_GetCode(tf_model->status) != TF_OK){
326 snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
327 op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
328 input.oper = *cur_op;
329 TF_AddInput(op_desc, input);
331 TF_AddInput(op_desc, input);
332 TF_SetAttrType(op_desc, "T", TF_FLOAT);
333 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
334 if (TF_GetCode(tf_model->status) != TF_OK){
338 snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
339 switch (params->activation){
341 op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
344 op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
347 op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
352 input.oper = *cur_op;
353 TF_AddInput(op_desc, input);
354 TF_SetAttrType(op_desc, "T", TF_FLOAT);
355 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
356 if (TF_GetCode(tf_model->status) != TF_OK){
363 static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
364 DepthToSpaceParams *params, const int layer)
366 TF_OperationDescription *op_desc;
368 char name_buffer[NAME_BUFFER_SIZE];
370 snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
371 op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
372 input.oper = *cur_op;
374 TF_AddInput(op_desc, input);
375 TF_SetAttrType(op_desc, "T", TF_FLOAT);
376 TF_SetAttrInt(op_desc, "block_size", params->block_size);
377 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
378 if (TF_GetCode(tf_model->status) != TF_OK){
385 static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
386 LayerPadParams *params, const int layer)
390 TF_OperationDescription *op_desc;
393 int64_t pads_shape[] = {4, 2};
395 char name_buffer[NAME_BUFFER_SIZE];
396 snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
398 op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
399 TF_SetAttrType(op_desc, "dtype", TF_INT32);
400 tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
401 pads = (int32_t *)TF_TensorData(tensor);
402 pads[0] = params->paddings[0][0];
403 pads[1] = params->paddings[0][1];
404 pads[2] = params->paddings[1][0];
405 pads[3] = params->paddings[1][1];
406 pads[4] = params->paddings[2][0];
407 pads[5] = params->paddings[2][1];
408 pads[6] = params->paddings[3][0];
409 pads[7] = params->paddings[3][1];
410 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
411 if (TF_GetCode(tf_model->status) != TF_OK){
414 op = TF_FinishOperation(op_desc, tf_model->status);
415 if (TF_GetCode(tf_model->status) != TF_OK){
419 op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
420 input.oper = *cur_op;
422 TF_AddInput(op_desc, input);
424 TF_AddInput(op_desc, input);
425 TF_SetAttrType(op_desc, "T", TF_FLOAT);
426 TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
427 TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
428 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
429 if (TF_GetCode(tf_model->status) != TF_OK){
436 static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op,
437 DnnLayerMaximumParams *params, const int layer)
441 TF_OperationDescription *op_desc;
445 char name_buffer[NAME_BUFFER_SIZE];
446 snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);
448 op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
449 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
450 tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0, TF_DataTypeSize(TF_FLOAT));
451 y = (float *)TF_TensorData(tensor);
453 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
454 if (TF_GetCode(tf_model->status) != TF_OK){
457 op = TF_FinishOperation(op_desc, tf_model->status);
458 if (TF_GetCode(tf_model->status) != TF_OK){
462 snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
463 op_desc = TF_NewOperation(tf_model->graph, "Maximum", name_buffer);
464 input.oper = *cur_op;
466 TF_AddInput(op_desc, input);
468 TF_AddInput(op_desc, input);
469 TF_SetAttrType(op_desc, "T", TF_FLOAT);
470 *cur_op = TF_FinishOperation(op_desc, tf_model->status);
471 if (TF_GetCode(tf_model->status) != TF_OK){
478 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
481 TF_OperationDescription *op_desc;
483 TF_Operation *transpose_op;
486 int32_t *transpose_perm;
487 int64_t transpose_perm_shape[] = {4};
488 int64_t input_shape[] = {1, -1, -1, -1};
489 DNNReturnType layer_add_res;
490 DNNModel *native_model = NULL;
491 ConvolutionalNetwork *conv_network;
493 native_model = ff_dnn_load_model_native(model_filename);
498 conv_network = (ConvolutionalNetwork *)native_model->model;
499 tf_model->graph = TF_NewGraph();
500 tf_model->status = TF_NewStatus();
502 #define CLEANUP_ON_ERROR(tf_model) \
504 TF_DeleteGraph(tf_model->graph); \
505 TF_DeleteStatus(tf_model->status); \
509 op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
510 TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
511 TF_SetAttrShape(op_desc, "shape", input_shape, 4);
512 op = TF_FinishOperation(op_desc, tf_model->status);
513 if (TF_GetCode(tf_model->status) != TF_OK){
514 CLEANUP_ON_ERROR(tf_model);
517 op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
518 TF_SetAttrType(op_desc, "dtype", TF_INT32);
519 tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
520 transpose_perm = (int32_t *)TF_TensorData(tensor);
521 transpose_perm[0] = 1;
522 transpose_perm[1] = 2;
523 transpose_perm[2] = 3;
524 transpose_perm[3] = 0;
525 TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
526 if (TF_GetCode(tf_model->status) != TF_OK){
527 CLEANUP_ON_ERROR(tf_model);
529 transpose_op = TF_FinishOperation(op_desc, tf_model->status);
531 for (layer = 0; layer < conv_network->layers_num; ++layer){
532 switch (conv_network->layers[layer].type){
534 layer_add_res = DNN_SUCCESS;
537 layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
538 (ConvolutionalParams *)conv_network->layers[layer].params, layer);
540 case DLT_DEPTH_TO_SPACE:
541 layer_add_res = add_depth_to_space_layer(tf_model, &op,
542 (DepthToSpaceParams *)conv_network->layers[layer].params, layer);
545 layer_add_res = add_pad_layer(tf_model, &op,
546 (LayerPadParams *)conv_network->layers[layer].params, layer);
549 layer_add_res = add_maximum_layer(tf_model, &op,
550 (DnnLayerMaximumParams *)conv_network->layers[layer].params, layer);
553 CLEANUP_ON_ERROR(tf_model);
556 if (layer_add_res != DNN_SUCCESS){
557 CLEANUP_ON_ERROR(tf_model);
561 op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
564 TF_AddInput(op_desc, input);
565 TF_FinishOperation(op_desc, tf_model->status);
566 if (TF_GetCode(tf_model->status) != TF_OK){
567 CLEANUP_ON_ERROR(tf_model);
570 ff_dnn_free_model_native(&native_model);
575 DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options)
577 DNNModel *model = NULL;
578 TFModel *tf_model = NULL;
580 model = av_malloc(sizeof(DNNModel));
585 tf_model = av_mallocz(sizeof(TFModel));
591 if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
592 if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
600 model->model = (void *)tf_model;
601 model->set_input_output = &set_input_output_tf;
602 model->get_input = &get_input_tf;
603 model->options = options;
610 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
612 TFModel *tf_model = (TFModel *)model->model;
613 uint32_t nb = FFMIN(nb_output, tf_model->nb_output);
617 av_assert0(tf_model->output_tensors);
618 for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
619 if (tf_model->output_tensors[i]) {
620 TF_DeleteTensor(tf_model->output_tensors[i]);
621 tf_model->output_tensors[i] = NULL;
625 TF_SessionRun(tf_model->session, NULL,
626 &tf_model->input, &tf_model->input_tensor, 1,
627 tf_model->outputs, tf_model->output_tensors, nb,
628 NULL, 0, NULL, tf_model->status);
630 if (TF_GetCode(tf_model->status) != TF_OK){
634 for (uint32_t i = 0; i < nb; ++i) {
635 outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1);
636 outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2);
637 outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3);
638 outputs[i].data = TF_TensorData(tf_model->output_tensors[i]);
639 outputs[i].dt = TF_TensorType(tf_model->output_tensors[i]);
645 void ff_dnn_free_model_tf(DNNModel **model)
650 tf_model = (TFModel *)(*model)->model;
651 if (tf_model->graph){
652 TF_DeleteGraph(tf_model->graph);
654 if (tf_model->session){
655 TF_CloseSession(tf_model->session, tf_model->status);
656 TF_DeleteSession(tf_model->session, tf_model->status);
658 if (tf_model->status){
659 TF_DeleteStatus(tf_model->status);
661 if (tf_model->input_tensor){
662 TF_DeleteTensor(tf_model->input_tensor);
664 if (tf_model->output_tensors) {
665 for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
666 if (tf_model->output_tensors[i]) {
667 TF_DeleteTensor(tf_model->output_tensors[i]);
668 tf_model->output_tensors[i] = NULL;
672 av_freep(&tf_model->outputs);
673 av_freep(&tf_model->output_tensors);