]> git.sesse.net Git - ffmpeg/blob - libavfilter/dnn_backend_tf.c
Merge commit '52c9b0a6c0d02cff6caebcf6989e565e05b55200'
[ffmpeg] / libavfilter / dnn_backend_tf.c
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * DNN tensorflow backend implementation.
24  */
25
26 #include "dnn_backend_tf.h"
27 #include "dnn_backend_native.h"
28 #include "libavformat/avio.h"
29
30 #include <tensorflow/c/c_api.h>
31
32 typedef struct TFModel{
33     TF_Graph *graph;
34     TF_Session *session;
35     TF_Status *status;
36     TF_Output input, output;
37     TF_Tensor *input_tensor;
38     DNNData *output_data;
39 } TFModel;
40
41 static void free_buffer(void *data, size_t length)
42 {
43     av_freep(&data);
44 }
45
46 static TF_Buffer *read_graph(const char *model_filename)
47 {
48     TF_Buffer *graph_buf;
49     unsigned char *graph_data = NULL;
50     AVIOContext *model_file_context;
51     long size, bytes_read;
52
53     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
54         return NULL;
55     }
56
57     size = avio_size(model_file_context);
58
59     graph_data = av_malloc(size);
60     if (!graph_data){
61         avio_closep(&model_file_context);
62         return NULL;
63     }
64     bytes_read = avio_read(model_file_context, graph_data, size);
65     avio_closep(&model_file_context);
66     if (bytes_read != size){
67         av_freep(&graph_data);
68         return NULL;
69     }
70
71     graph_buf = TF_NewBuffer();
72     graph_buf->data = (void *)graph_data;
73     graph_buf->length = size;
74     graph_buf->data_deallocator = free_buffer;
75
76     return graph_buf;
77 }
78
79 static DNNReturnType set_input_output_tf(void *model, DNNData *input, DNNData *output)
80 {
81     TFModel *tf_model = (TFModel *)model;
82     int64_t input_dims[] = {1, input->height, input->width, input->channels};
83     TF_SessionOptions *sess_opts;
84     const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
85     TF_Tensor *output_tensor;
86
87     // Input operation should be named 'x'
88     tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, "x");
89     if (!tf_model->input.oper){
90         return DNN_ERROR;
91     }
92     tf_model->input.index = 0;
93     if (tf_model->input_tensor){
94         TF_DeleteTensor(tf_model->input_tensor);
95     }
96     tf_model->input_tensor = TF_AllocateTensor(TF_FLOAT, input_dims, 4,
97                                                input_dims[1] * input_dims[2] * input_dims[3] * sizeof(float));
98     if (!tf_model->input_tensor){
99         return DNN_ERROR;
100     }
101     input->data = (float *)TF_TensorData(tf_model->input_tensor);
102
103     // Output operation should be named 'y'
104     tf_model->output.oper = TF_GraphOperationByName(tf_model->graph, "y");
105     if (!tf_model->output.oper){
106         return DNN_ERROR;
107     }
108     tf_model->output.index = 0;
109
110     if (tf_model->session){
111         TF_CloseSession(tf_model->session, tf_model->status);
112         TF_DeleteSession(tf_model->session, tf_model->status);
113     }
114
115     sess_opts = TF_NewSessionOptions();
116     tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
117     TF_DeleteSessionOptions(sess_opts);
118     if (TF_GetCode(tf_model->status) != TF_OK)
119     {
120         return DNN_ERROR;
121     }
122
123     // Run initialization operation with name "init" if it is present in graph
124     if (init_op){
125         TF_SessionRun(tf_model->session, NULL,
126                       NULL, NULL, 0,
127                       NULL, NULL, 0,
128                       &init_op, 1, NULL, tf_model->status);
129         if (TF_GetCode(tf_model->status) != TF_OK)
130         {
131             return DNN_ERROR;
132         }
133     }
134
135     // Execute network to get output height, width and number of channels
136     TF_SessionRun(tf_model->session, NULL,
137                   &tf_model->input, &tf_model->input_tensor, 1,
138                   &tf_model->output, &output_tensor, 1,
139                   NULL, 0, NULL, tf_model->status);
140     if (TF_GetCode(tf_model->status) != TF_OK){
141         return DNN_ERROR;
142     }
143     else{
144         output->height = TF_Dim(output_tensor, 1);
145         output->width = TF_Dim(output_tensor, 2);
146         output->channels = TF_Dim(output_tensor, 3);
147         output->data = av_malloc(output->height * output->width * output->channels * sizeof(float));
148         if (!output->data){
149             return DNN_ERROR;
150         }
151         tf_model->output_data = output;
152         TF_DeleteTensor(output_tensor);
153     }
154
155     return DNN_SUCCESS;
156 }
157
158 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
159 {
160     TF_Buffer *graph_def;
161     TF_ImportGraphDefOptions *graph_opts;
162
163     graph_def = read_graph(model_filename);
164     if (!graph_def){
165         return DNN_ERROR;
166     }
167     tf_model->graph = TF_NewGraph();
168     tf_model->status = TF_NewStatus();
169     graph_opts = TF_NewImportGraphDefOptions();
170     TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
171     TF_DeleteImportGraphDefOptions(graph_opts);
172     TF_DeleteBuffer(graph_def);
173     if (TF_GetCode(tf_model->status) != TF_OK){
174         TF_DeleteGraph(tf_model->graph);
175         TF_DeleteStatus(tf_model->status);
176         return DNN_ERROR;
177     }
178
179     return DNN_SUCCESS;
180 }
181
182 #define NAME_BUFFER_SIZE 256
183
184 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
185                                     ConvolutionalParams* params, const int layer)
186 {
187     TF_Operation *op;
188     TF_OperationDescription *op_desc;
189     TF_Output input;
190     int64_t strides[] = {1, 1, 1, 1};
191     TF_Tensor *tensor;
192     int64_t dims[4];
193     int dims_len;
194     char name_buffer[NAME_BUFFER_SIZE];
195     int32_t size;
196
197     size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
198     input.index = 0;
199
200     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
201     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
202     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
203     dims[0] = params->output_num;
204     dims[1] = params->kernel_size;
205     dims[2] = params->kernel_size;
206     dims[3] = params->input_num;
207     dims_len = 4;
208     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
209     memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
210     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
211     if (TF_GetCode(tf_model->status) != TF_OK){
212         return DNN_ERROR;
213     }
214     op = TF_FinishOperation(op_desc, tf_model->status);
215     if (TF_GetCode(tf_model->status) != TF_OK){
216         return DNN_ERROR;
217     }
218
219     snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
220     op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
221     input.oper = op;
222     TF_AddInput(op_desc, input);
223     input.oper = transpose_op;
224     TF_AddInput(op_desc, input);
225     TF_SetAttrType(op_desc, "T", TF_FLOAT);
226     TF_SetAttrType(op_desc, "Tperm", TF_INT32);
227     op = TF_FinishOperation(op_desc, tf_model->status);
228     if (TF_GetCode(tf_model->status) != TF_OK){
229         return DNN_ERROR;
230     }
231
232     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
233     op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
234     input.oper = *cur_op;
235     TF_AddInput(op_desc, input);
236     input.oper = op;
237     TF_AddInput(op_desc, input);
238     TF_SetAttrType(op_desc, "T", TF_FLOAT);
239     TF_SetAttrIntList(op_desc, "strides", strides, 4);
240     TF_SetAttrString(op_desc, "padding", "VALID", 5);
241     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
242     if (TF_GetCode(tf_model->status) != TF_OK){
243         return DNN_ERROR;
244     }
245
246     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
247     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
248     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
249     dims[0] = params->output_num;
250     dims_len = 1;
251     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
252     memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
253     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
254     if (TF_GetCode(tf_model->status) != TF_OK){
255         return DNN_ERROR;
256     }
257     op = TF_FinishOperation(op_desc, tf_model->status);
258     if (TF_GetCode(tf_model->status) != TF_OK){
259         return DNN_ERROR;
260     }
261
262     snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
263     op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
264     input.oper = *cur_op;
265     TF_AddInput(op_desc, input);
266     input.oper = op;
267     TF_AddInput(op_desc, input);
268     TF_SetAttrType(op_desc, "T", TF_FLOAT);
269     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
270     if (TF_GetCode(tf_model->status) != TF_OK){
271         return DNN_ERROR;
272     }
273
274     snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
275     switch (params->activation){
276     case RELU:
277         op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
278         break;
279     case TANH:
280         op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
281         break;
282     case SIGMOID:
283         op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
284         break;
285     default:
286         return DNN_ERROR;
287     }
288     input.oper = *cur_op;
289     TF_AddInput(op_desc, input);
290     TF_SetAttrType(op_desc, "T", TF_FLOAT);
291     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
292     if (TF_GetCode(tf_model->status) != TF_OK){
293         return DNN_ERROR;
294     }
295
296     return DNN_SUCCESS;
297 }
298
299 static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
300                                               DepthToSpaceParams *params, const int layer)
301 {
302     TF_OperationDescription *op_desc;
303     TF_Output input;
304     char name_buffer[NAME_BUFFER_SIZE];
305
306     snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
307     op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
308     input.oper = *cur_op;
309     input.index = 0;
310     TF_AddInput(op_desc, input);
311     TF_SetAttrType(op_desc, "T", TF_FLOAT);
312     TF_SetAttrInt(op_desc, "block_size", params->block_size);
313     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
314     if (TF_GetCode(tf_model->status) != TF_OK){
315         return DNN_ERROR;
316     }
317
318     return DNN_SUCCESS;
319 }
320
321 static int calculate_pad(const ConvolutionalNetwork *conv_network)
322 {
323     ConvolutionalParams *params;
324     int32_t layer;
325     int pad = 0;
326
327     for (layer = 0; layer < conv_network->layers_num; ++layer){
328         if (conv_network->layers[layer].type == CONV){
329             params = (ConvolutionalParams *)conv_network->layers[layer].params;
330             pad += params->kernel_size >> 1;
331         }
332     }
333
334     return pad;
335 }
336
337 static DNNReturnType add_pad_op(TFModel *tf_model, TF_Operation **cur_op, const int32_t pad)
338 {
339     TF_Operation *op;
340     TF_Tensor *tensor;
341     TF_OperationDescription *op_desc;
342     TF_Output input;
343     int32_t *pads;
344     int64_t pads_shape[] = {4, 2};
345
346     input.index = 0;
347
348     op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
349     TF_SetAttrType(op_desc, "dtype", TF_INT32);
350     tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
351     pads = (int32_t *)TF_TensorData(tensor);
352     pads[0] = 0;   pads[1] = 0;
353     pads[2] = pad; pads[3] = pad;
354     pads[4] = pad; pads[5] = pad;
355     pads[6] = 0;   pads[7] = 0;
356     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
357     if (TF_GetCode(tf_model->status) != TF_OK){
358         return DNN_ERROR;
359     }
360     op = TF_FinishOperation(op_desc, tf_model->status);
361     if (TF_GetCode(tf_model->status) != TF_OK){
362         return DNN_ERROR;
363     }
364
365     op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
366     input.oper = *cur_op;
367     TF_AddInput(op_desc, input);
368     input.oper = op;
369     TF_AddInput(op_desc, input);
370     TF_SetAttrType(op_desc, "T", TF_FLOAT);
371     TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
372     TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
373     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
374     if (TF_GetCode(tf_model->status) != TF_OK){
375         return DNN_ERROR;
376     }
377
378     return DNN_SUCCESS;
379 }
380
381 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
382 {
383     int32_t layer;
384     TF_OperationDescription *op_desc;
385     TF_Operation *op;
386     TF_Operation *transpose_op;
387     TF_Tensor *tensor;
388     TF_Output input;
389     int32_t *transpose_perm;
390     int64_t transpose_perm_shape[] = {4};
391     int64_t input_shape[] = {1, -1, -1, -1};
392     int32_t pad;
393     DNNReturnType layer_add_res;
394     DNNModel *native_model = NULL;
395     ConvolutionalNetwork *conv_network;
396
397     native_model = ff_dnn_load_model_native(model_filename);
398     if (!native_model){
399         return DNN_ERROR;
400     }
401
402     conv_network = (ConvolutionalNetwork *)native_model->model;
403     pad = calculate_pad(conv_network);
404     tf_model->graph = TF_NewGraph();
405     tf_model->status = TF_NewStatus();
406
407 #define CLEANUP_ON_ERROR(tf_model) \
408     { \
409         TF_DeleteGraph(tf_model->graph); \
410         TF_DeleteStatus(tf_model->status); \
411         return DNN_ERROR; \
412     }
413
414     op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
415     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
416     TF_SetAttrShape(op_desc, "shape", input_shape, 4);
417     op = TF_FinishOperation(op_desc, tf_model->status);
418     if (TF_GetCode(tf_model->status) != TF_OK){
419         CLEANUP_ON_ERROR(tf_model);
420     }
421
422     if (add_pad_op(tf_model, &op, pad) != DNN_SUCCESS){
423         CLEANUP_ON_ERROR(tf_model);
424     }
425
426     op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
427     TF_SetAttrType(op_desc, "dtype", TF_INT32);
428     tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
429     transpose_perm = (int32_t *)TF_TensorData(tensor);
430     transpose_perm[0] = 1;
431     transpose_perm[1] = 2;
432     transpose_perm[2] = 3;
433     transpose_perm[3] = 0;
434     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
435     if (TF_GetCode(tf_model->status) != TF_OK){
436         CLEANUP_ON_ERROR(tf_model);
437     }
438     transpose_op = TF_FinishOperation(op_desc, tf_model->status);
439
440     for (layer = 0; layer < conv_network->layers_num; ++layer){
441         switch (conv_network->layers[layer].type){
442         case INPUT:
443             break;
444         case CONV:
445             layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
446                                            (ConvolutionalParams *)conv_network->layers[layer].params, layer);
447             break;
448         case DEPTH_TO_SPACE:
449             layer_add_res = add_depth_to_space_layer(tf_model, &op,
450                                                      (DepthToSpaceParams *)conv_network->layers[layer].params, layer);
451             break;
452         default:
453             CLEANUP_ON_ERROR(tf_model);
454         }
455
456         if (layer_add_res != DNN_SUCCESS){
457             CLEANUP_ON_ERROR(tf_model);
458         }
459     }
460
461     op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
462     input.oper = op;
463     TF_AddInput(op_desc, input);
464     TF_FinishOperation(op_desc, tf_model->status);
465     if (TF_GetCode(tf_model->status) != TF_OK){
466         CLEANUP_ON_ERROR(tf_model);
467     }
468
469     ff_dnn_free_model_native(&native_model);
470
471     return DNN_SUCCESS;
472 }
473
474 DNNModel *ff_dnn_load_model_tf(const char *model_filename)
475 {
476     DNNModel *model = NULL;
477     TFModel *tf_model = NULL;
478
479     model = av_malloc(sizeof(DNNModel));
480     if (!model){
481         return NULL;
482     }
483
484     tf_model = av_malloc(sizeof(TFModel));
485     if (!tf_model){
486         av_freep(&model);
487         return NULL;
488     }
489     tf_model->session = NULL;
490     tf_model->input_tensor = NULL;
491     tf_model->output_data = NULL;
492
493     if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
494         if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
495             av_freep(&tf_model);
496             av_freep(&model);
497
498             return NULL;
499         }
500     }
501
502     model->model = (void *)tf_model;
503     model->set_input_output = &set_input_output_tf;
504
505     return model;
506 }
507
508
509
510 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model)
511 {
512     TFModel *tf_model = (TFModel *)model->model;
513     TF_Tensor *output_tensor;
514
515     TF_SessionRun(tf_model->session, NULL,
516                   &tf_model->input, &tf_model->input_tensor, 1,
517                   &tf_model->output, &output_tensor, 1,
518                   NULL, 0, NULL, tf_model->status);
519
520     if (TF_GetCode(tf_model->status) != TF_OK){
521         return DNN_ERROR;
522     }
523     else{
524         memcpy(tf_model->output_data->data, TF_TensorData(output_tensor),
525                tf_model->output_data->height * tf_model->output_data->width *
526                tf_model->output_data->channels * sizeof(float));
527         TF_DeleteTensor(output_tensor);
528
529         return DNN_SUCCESS;
530     }
531 }
532
533 void ff_dnn_free_model_tf(DNNModel **model)
534 {
535     TFModel *tf_model;
536
537     if (*model){
538         tf_model = (TFModel *)(*model)->model;
539         if (tf_model->graph){
540             TF_DeleteGraph(tf_model->graph);
541         }
542         if (tf_model->session){
543             TF_CloseSession(tf_model->session, tf_model->status);
544             TF_DeleteSession(tf_model->session, tf_model->status);
545         }
546         if (tf_model->status){
547             TF_DeleteStatus(tf_model->status);
548         }
549         if (tf_model->input_tensor){
550             TF_DeleteTensor(tf_model->input_tensor);
551         }
552         if (tf_model->output_data){
553             av_freep(&tf_model->output_data->data);
554         }
555         av_freep(&tf_model);
556         av_freep(model);
557     }
558 }