]> git.sesse.net Git - ffmpeg/blob - libavfilter/dnn/dnn_backend_tf.c
libavfilter/dnn/dnn_backend_tf: add tf.pad support for tensorflow backend with native...
[ffmpeg] / libavfilter / dnn / dnn_backend_tf.c
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * DNN tensorflow backend implementation.
24  */
25
26 #include "dnn_backend_tf.h"
27 #include "dnn_backend_native.h"
28 #include "libavformat/avio.h"
29 #include "libavutil/avassert.h"
30 #include "dnn_backend_native_layer_pad.h"
31
32 #include <tensorflow/c/c_api.h>
33
34 typedef struct TFModel{
35     TF_Graph *graph;
36     TF_Session *session;
37     TF_Status *status;
38     TF_Output input;
39     TF_Tensor *input_tensor;
40     TF_Output *outputs;
41     TF_Tensor **output_tensors;
42     uint32_t nb_output;
43 } TFModel;
44
45 static void free_buffer(void *data, size_t length)
46 {
47     av_freep(&data);
48 }
49
50 static TF_Buffer *read_graph(const char *model_filename)
51 {
52     TF_Buffer *graph_buf;
53     unsigned char *graph_data = NULL;
54     AVIOContext *model_file_context;
55     long size, bytes_read;
56
57     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
58         return NULL;
59     }
60
61     size = avio_size(model_file_context);
62
63     graph_data = av_malloc(size);
64     if (!graph_data){
65         avio_closep(&model_file_context);
66         return NULL;
67     }
68     bytes_read = avio_read(model_file_context, graph_data, size);
69     avio_closep(&model_file_context);
70     if (bytes_read != size){
71         av_freep(&graph_data);
72         return NULL;
73     }
74
75     graph_buf = TF_NewBuffer();
76     graph_buf->data = (void *)graph_data;
77     graph_buf->length = size;
78     graph_buf->data_deallocator = free_buffer;
79
80     return graph_buf;
81 }
82
83 static TF_Tensor *allocate_input_tensor(const DNNInputData *input)
84 {
85     TF_DataType dt;
86     size_t size;
87     int64_t input_dims[] = {1, input->height, input->width, input->channels};
88     switch (input->dt) {
89     case DNN_FLOAT:
90         dt = TF_FLOAT;
91         size = sizeof(float);
92         break;
93     case DNN_UINT8:
94         dt = TF_UINT8;
95         size = sizeof(char);
96         break;
97     default:
98         av_assert0(!"should not reach here");
99     }
100
101     return TF_AllocateTensor(dt, input_dims, 4,
102                              input_dims[1] * input_dims[2] * input_dims[3] * size);
103 }
104
105 static DNNReturnType set_input_output_tf(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
106 {
107     TFModel *tf_model = (TFModel *)model;
108     TF_SessionOptions *sess_opts;
109     const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
110
111     // Input operation
112     tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, input_name);
113     if (!tf_model->input.oper){
114         return DNN_ERROR;
115     }
116     tf_model->input.index = 0;
117     if (tf_model->input_tensor){
118         TF_DeleteTensor(tf_model->input_tensor);
119     }
120     tf_model->input_tensor = allocate_input_tensor(input);
121     if (!tf_model->input_tensor){
122         return DNN_ERROR;
123     }
124     input->data = (float *)TF_TensorData(tf_model->input_tensor);
125
126     // Output operation
127     if (nb_output == 0)
128         return DNN_ERROR;
129
130     av_freep(&tf_model->outputs);
131     tf_model->outputs = av_malloc_array(nb_output, sizeof(*tf_model->outputs));
132     if (!tf_model->outputs)
133         return DNN_ERROR;
134     for (int i = 0; i < nb_output; ++i) {
135         tf_model->outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
136         if (!tf_model->outputs[i].oper){
137             av_freep(&tf_model->outputs);
138             return DNN_ERROR;
139         }
140         tf_model->outputs[i].index = 0;
141     }
142
143     if (tf_model->output_tensors) {
144         for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
145             if (tf_model->output_tensors[i]) {
146                 TF_DeleteTensor(tf_model->output_tensors[i]);
147                 tf_model->output_tensors[i] = NULL;
148             }
149         }
150     }
151     av_freep(&tf_model->output_tensors);
152     tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors));
153     if (!tf_model->output_tensors) {
154         av_freep(&tf_model->outputs);
155         return DNN_ERROR;
156     }
157
158     tf_model->nb_output = nb_output;
159
160     if (tf_model->session){
161         TF_CloseSession(tf_model->session, tf_model->status);
162         TF_DeleteSession(tf_model->session, tf_model->status);
163     }
164
165     sess_opts = TF_NewSessionOptions();
166     tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
167     TF_DeleteSessionOptions(sess_opts);
168     if (TF_GetCode(tf_model->status) != TF_OK)
169     {
170         return DNN_ERROR;
171     }
172
173     // Run initialization operation with name "init" if it is present in graph
174     if (init_op){
175         TF_SessionRun(tf_model->session, NULL,
176                       NULL, NULL, 0,
177                       NULL, NULL, 0,
178                       &init_op, 1, NULL, tf_model->status);
179         if (TF_GetCode(tf_model->status) != TF_OK)
180         {
181             return DNN_ERROR;
182         }
183     }
184
185     return DNN_SUCCESS;
186 }
187
188 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
189 {
190     TF_Buffer *graph_def;
191     TF_ImportGraphDefOptions *graph_opts;
192
193     graph_def = read_graph(model_filename);
194     if (!graph_def){
195         return DNN_ERROR;
196     }
197     tf_model->graph = TF_NewGraph();
198     tf_model->status = TF_NewStatus();
199     graph_opts = TF_NewImportGraphDefOptions();
200     TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
201     TF_DeleteImportGraphDefOptions(graph_opts);
202     TF_DeleteBuffer(graph_def);
203     if (TF_GetCode(tf_model->status) != TF_OK){
204         TF_DeleteGraph(tf_model->graph);
205         TF_DeleteStatus(tf_model->status);
206         return DNN_ERROR;
207     }
208
209     return DNN_SUCCESS;
210 }
211
212 #define NAME_BUFFER_SIZE 256
213
214 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
215                                     ConvolutionalParams* params, const int layer)
216 {
217     TF_Operation *op;
218     TF_OperationDescription *op_desc;
219     TF_Output input;
220     int64_t strides[] = {1, 1, 1, 1};
221     TF_Tensor *tensor;
222     int64_t dims[4];
223     int dims_len;
224     char name_buffer[NAME_BUFFER_SIZE];
225     int32_t size;
226
227     size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
228     input.index = 0;
229
230     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
231     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
232     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
233     dims[0] = params->output_num;
234     dims[1] = params->kernel_size;
235     dims[2] = params->kernel_size;
236     dims[3] = params->input_num;
237     dims_len = 4;
238     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
239     memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
240     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
241     if (TF_GetCode(tf_model->status) != TF_OK){
242         return DNN_ERROR;
243     }
244     op = TF_FinishOperation(op_desc, tf_model->status);
245     if (TF_GetCode(tf_model->status) != TF_OK){
246         return DNN_ERROR;
247     }
248
249     snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
250     op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
251     input.oper = op;
252     TF_AddInput(op_desc, input);
253     input.oper = transpose_op;
254     TF_AddInput(op_desc, input);
255     TF_SetAttrType(op_desc, "T", TF_FLOAT);
256     TF_SetAttrType(op_desc, "Tperm", TF_INT32);
257     op = TF_FinishOperation(op_desc, tf_model->status);
258     if (TF_GetCode(tf_model->status) != TF_OK){
259         return DNN_ERROR;
260     }
261
262     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
263     op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
264     input.oper = *cur_op;
265     TF_AddInput(op_desc, input);
266     input.oper = op;
267     TF_AddInput(op_desc, input);
268     TF_SetAttrType(op_desc, "T", TF_FLOAT);
269     TF_SetAttrIntList(op_desc, "strides", strides, 4);
270     TF_SetAttrString(op_desc, "padding", "VALID", 5);
271     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
272     if (TF_GetCode(tf_model->status) != TF_OK){
273         return DNN_ERROR;
274     }
275
276     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
277     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
278     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
279     dims[0] = params->output_num;
280     dims_len = 1;
281     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
282     memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
283     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
284     if (TF_GetCode(tf_model->status) != TF_OK){
285         return DNN_ERROR;
286     }
287     op = TF_FinishOperation(op_desc, tf_model->status);
288     if (TF_GetCode(tf_model->status) != TF_OK){
289         return DNN_ERROR;
290     }
291
292     snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
293     op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
294     input.oper = *cur_op;
295     TF_AddInput(op_desc, input);
296     input.oper = op;
297     TF_AddInput(op_desc, input);
298     TF_SetAttrType(op_desc, "T", TF_FLOAT);
299     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
300     if (TF_GetCode(tf_model->status) != TF_OK){
301         return DNN_ERROR;
302     }
303
304     snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
305     switch (params->activation){
306     case RELU:
307         op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
308         break;
309     case TANH:
310         op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
311         break;
312     case SIGMOID:
313         op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
314         break;
315     default:
316         return DNN_ERROR;
317     }
318     input.oper = *cur_op;
319     TF_AddInput(op_desc, input);
320     TF_SetAttrType(op_desc, "T", TF_FLOAT);
321     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
322     if (TF_GetCode(tf_model->status) != TF_OK){
323         return DNN_ERROR;
324     }
325
326     return DNN_SUCCESS;
327 }
328
329 static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
330                                               DepthToSpaceParams *params, const int layer)
331 {
332     TF_OperationDescription *op_desc;
333     TF_Output input;
334     char name_buffer[NAME_BUFFER_SIZE];
335
336     snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
337     op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
338     input.oper = *cur_op;
339     input.index = 0;
340     TF_AddInput(op_desc, input);
341     TF_SetAttrType(op_desc, "T", TF_FLOAT);
342     TF_SetAttrInt(op_desc, "block_size", params->block_size);
343     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
344     if (TF_GetCode(tf_model->status) != TF_OK){
345         return DNN_ERROR;
346     }
347
348     return DNN_SUCCESS;
349 }
350
351 static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
352                                               LayerPadParams *params, const int layer)
353 {
354     TF_Operation *op;
355     TF_Tensor *tensor;
356     TF_OperationDescription *op_desc;
357     TF_Output input;
358     int32_t *pads;
359     int64_t pads_shape[] = {4, 2};
360
361     char name_buffer[NAME_BUFFER_SIZE];
362     snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
363
364     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
365     TF_SetAttrType(op_desc, "dtype", TF_INT32);
366     tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
367     pads = (int32_t *)TF_TensorData(tensor);
368     pads[0] = params->paddings[0][0];
369     pads[1] = params->paddings[0][1];
370     pads[2] = params->paddings[1][0];
371     pads[3] = params->paddings[1][1];
372     pads[4] = params->paddings[2][0];
373     pads[5] = params->paddings[2][1];
374     pads[6] = params->paddings[3][0];
375     pads[7] = params->paddings[3][1];
376     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
377     if (TF_GetCode(tf_model->status) != TF_OK){
378         return DNN_ERROR;
379     }
380     op = TF_FinishOperation(op_desc, tf_model->status);
381     if (TF_GetCode(tf_model->status) != TF_OK){
382         return DNN_ERROR;
383     }
384
385     op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
386     input.oper = *cur_op;
387     input.index = 0;
388     TF_AddInput(op_desc, input);
389     input.oper = op;
390     TF_AddInput(op_desc, input);
391     TF_SetAttrType(op_desc, "T", TF_FLOAT);
392     TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
393     TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
394     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
395     if (TF_GetCode(tf_model->status) != TF_OK){
396         return DNN_ERROR;
397     }
398
399     return DNN_SUCCESS;
400 }
401
402 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
403 {
404     int32_t layer;
405     TF_OperationDescription *op_desc;
406     TF_Operation *op;
407     TF_Operation *transpose_op;
408     TF_Tensor *tensor;
409     TF_Output input;
410     int32_t *transpose_perm;
411     int64_t transpose_perm_shape[] = {4};
412     int64_t input_shape[] = {1, -1, -1, -1};
413     DNNReturnType layer_add_res;
414     DNNModel *native_model = NULL;
415     ConvolutionalNetwork *conv_network;
416
417     native_model = ff_dnn_load_model_native(model_filename);
418     if (!native_model){
419         return DNN_ERROR;
420     }
421
422     conv_network = (ConvolutionalNetwork *)native_model->model;
423     tf_model->graph = TF_NewGraph();
424     tf_model->status = TF_NewStatus();
425
426 #define CLEANUP_ON_ERROR(tf_model) \
427     { \
428         TF_DeleteGraph(tf_model->graph); \
429         TF_DeleteStatus(tf_model->status); \
430         return DNN_ERROR; \
431     }
432
433     op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
434     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
435     TF_SetAttrShape(op_desc, "shape", input_shape, 4);
436     op = TF_FinishOperation(op_desc, tf_model->status);
437     if (TF_GetCode(tf_model->status) != TF_OK){
438         CLEANUP_ON_ERROR(tf_model);
439     }
440
441     op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
442     TF_SetAttrType(op_desc, "dtype", TF_INT32);
443     tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
444     transpose_perm = (int32_t *)TF_TensorData(tensor);
445     transpose_perm[0] = 1;
446     transpose_perm[1] = 2;
447     transpose_perm[2] = 3;
448     transpose_perm[3] = 0;
449     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
450     if (TF_GetCode(tf_model->status) != TF_OK){
451         CLEANUP_ON_ERROR(tf_model);
452     }
453     transpose_op = TF_FinishOperation(op_desc, tf_model->status);
454
455     for (layer = 0; layer < conv_network->layers_num; ++layer){
456         switch (conv_network->layers[layer].type){
457         case INPUT:
458             layer_add_res = DNN_SUCCESS;
459             break;
460         case CONV:
461             layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
462                                            (ConvolutionalParams *)conv_network->layers[layer].params, layer);
463             break;
464         case DEPTH_TO_SPACE:
465             layer_add_res = add_depth_to_space_layer(tf_model, &op,
466                                                      (DepthToSpaceParams *)conv_network->layers[layer].params, layer);
467             break;
468         case MIRROR_PAD:
469             layer_add_res = add_pad_layer(tf_model, &op,
470                                           (LayerPadParams *)conv_network->layers[layer].params, layer);
471             break;
472         default:
473             CLEANUP_ON_ERROR(tf_model);
474         }
475
476         if (layer_add_res != DNN_SUCCESS){
477             CLEANUP_ON_ERROR(tf_model);
478         }
479     }
480
481     op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
482     input.oper = op;
483     input.index = 0;
484     TF_AddInput(op_desc, input);
485     TF_FinishOperation(op_desc, tf_model->status);
486     if (TF_GetCode(tf_model->status) != TF_OK){
487         CLEANUP_ON_ERROR(tf_model);
488     }
489
490     ff_dnn_free_model_native(&native_model);
491
492     return DNN_SUCCESS;
493 }
494
495 DNNModel *ff_dnn_load_model_tf(const char *model_filename)
496 {
497     DNNModel *model = NULL;
498     TFModel *tf_model = NULL;
499
500     model = av_malloc(sizeof(DNNModel));
501     if (!model){
502         return NULL;
503     }
504
505     tf_model = av_mallocz(sizeof(TFModel));
506     if (!tf_model){
507         av_freep(&model);
508         return NULL;
509     }
510
511     if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
512         if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
513             av_freep(&tf_model);
514             av_freep(&model);
515
516             return NULL;
517         }
518     }
519
520     model->model = (void *)tf_model;
521     model->set_input_output = &set_input_output_tf;
522
523     return model;
524 }
525
526
527
528 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
529 {
530     TFModel *tf_model = (TFModel *)model->model;
531     uint32_t nb = FFMIN(nb_output, tf_model->nb_output);
532     if (nb == 0)
533         return DNN_ERROR;
534
535     av_assert0(tf_model->output_tensors);
536     for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
537         if (tf_model->output_tensors[i]) {
538             TF_DeleteTensor(tf_model->output_tensors[i]);
539             tf_model->output_tensors[i] = NULL;
540         }
541     }
542
543     TF_SessionRun(tf_model->session, NULL,
544                   &tf_model->input, &tf_model->input_tensor, 1,
545                   tf_model->outputs, tf_model->output_tensors, nb,
546                   NULL, 0, NULL, tf_model->status);
547
548     if (TF_GetCode(tf_model->status) != TF_OK){
549         return DNN_ERROR;
550     }
551
552     for (uint32_t i = 0; i < nb; ++i) {
553         outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1);
554         outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2);
555         outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3);
556         outputs[i].data = TF_TensorData(tf_model->output_tensors[i]);
557     }
558
559     return DNN_SUCCESS;
560 }
561
562 void ff_dnn_free_model_tf(DNNModel **model)
563 {
564     TFModel *tf_model;
565
566     if (*model){
567         tf_model = (TFModel *)(*model)->model;
568         if (tf_model->graph){
569             TF_DeleteGraph(tf_model->graph);
570         }
571         if (tf_model->session){
572             TF_CloseSession(tf_model->session, tf_model->status);
573             TF_DeleteSession(tf_model->session, tf_model->status);
574         }
575         if (tf_model->status){
576             TF_DeleteStatus(tf_model->status);
577         }
578         if (tf_model->input_tensor){
579             TF_DeleteTensor(tf_model->input_tensor);
580         }
581         if (tf_model->output_tensors) {
582             for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
583                 if (tf_model->output_tensors[i]) {
584                     TF_DeleteTensor(tf_model->output_tensors[i]);
585                     tf_model->output_tensors[i] = NULL;
586                 }
587             }
588         }
589         av_freep(&tf_model->outputs);
590         av_freep(&tf_model->output_tensors);
591         av_freep(&tf_model);
592         av_freep(model);
593     }
594 }