]> git.sesse.net Git - ffmpeg/blob - libavfilter/dnn/dnn_backend_tf.c
libavfilter/dnn: separate conv2d layer from dnn_backend_native.c to a new file
[ffmpeg] / libavfilter / dnn / dnn_backend_tf.c
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * DNN tensorflow backend implementation.
24  */
25
26 #include "dnn_backend_tf.h"
27 #include "dnn_backend_native.h"
28 #include "dnn_backend_native_layer_conv2d.h"
29 #include "libavformat/avio.h"
30 #include "libavutil/avassert.h"
31 #include "dnn_backend_native_layer_pad.h"
32
33 #include <tensorflow/c/c_api.h>
34
35 typedef struct TFModel{
36     TF_Graph *graph;
37     TF_Session *session;
38     TF_Status *status;
39     TF_Output input;
40     TF_Tensor *input_tensor;
41     TF_Output *outputs;
42     TF_Tensor **output_tensors;
43     uint32_t nb_output;
44 } TFModel;
45
46 static void free_buffer(void *data, size_t length)
47 {
48     av_freep(&data);
49 }
50
51 static TF_Buffer *read_graph(const char *model_filename)
52 {
53     TF_Buffer *graph_buf;
54     unsigned char *graph_data = NULL;
55     AVIOContext *model_file_context;
56     long size, bytes_read;
57
58     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
59         return NULL;
60     }
61
62     size = avio_size(model_file_context);
63
64     graph_data = av_malloc(size);
65     if (!graph_data){
66         avio_closep(&model_file_context);
67         return NULL;
68     }
69     bytes_read = avio_read(model_file_context, graph_data, size);
70     avio_closep(&model_file_context);
71     if (bytes_read != size){
72         av_freep(&graph_data);
73         return NULL;
74     }
75
76     graph_buf = TF_NewBuffer();
77     graph_buf->data = (void *)graph_data;
78     graph_buf->length = size;
79     graph_buf->data_deallocator = free_buffer;
80
81     return graph_buf;
82 }
83
84 static TF_Tensor *allocate_input_tensor(const DNNInputData *input)
85 {
86     TF_DataType dt;
87     size_t size;
88     int64_t input_dims[] = {1, input->height, input->width, input->channels};
89     switch (input->dt) {
90     case DNN_FLOAT:
91         dt = TF_FLOAT;
92         size = sizeof(float);
93         break;
94     case DNN_UINT8:
95         dt = TF_UINT8;
96         size = sizeof(char);
97         break;
98     default:
99         av_assert0(!"should not reach here");
100     }
101
102     return TF_AllocateTensor(dt, input_dims, 4,
103                              input_dims[1] * input_dims[2] * input_dims[3] * size);
104 }
105
106 static DNNReturnType set_input_output_tf(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
107 {
108     TFModel *tf_model = (TFModel *)model;
109     TF_SessionOptions *sess_opts;
110     const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
111
112     // Input operation
113     tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, input_name);
114     if (!tf_model->input.oper){
115         return DNN_ERROR;
116     }
117     tf_model->input.index = 0;
118     if (tf_model->input_tensor){
119         TF_DeleteTensor(tf_model->input_tensor);
120     }
121     tf_model->input_tensor = allocate_input_tensor(input);
122     if (!tf_model->input_tensor){
123         return DNN_ERROR;
124     }
125     input->data = (float *)TF_TensorData(tf_model->input_tensor);
126
127     // Output operation
128     if (nb_output == 0)
129         return DNN_ERROR;
130
131     av_freep(&tf_model->outputs);
132     tf_model->outputs = av_malloc_array(nb_output, sizeof(*tf_model->outputs));
133     if (!tf_model->outputs)
134         return DNN_ERROR;
135     for (int i = 0; i < nb_output; ++i) {
136         tf_model->outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
137         if (!tf_model->outputs[i].oper){
138             av_freep(&tf_model->outputs);
139             return DNN_ERROR;
140         }
141         tf_model->outputs[i].index = 0;
142     }
143
144     if (tf_model->output_tensors) {
145         for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
146             if (tf_model->output_tensors[i]) {
147                 TF_DeleteTensor(tf_model->output_tensors[i]);
148                 tf_model->output_tensors[i] = NULL;
149             }
150         }
151     }
152     av_freep(&tf_model->output_tensors);
153     tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors));
154     if (!tf_model->output_tensors) {
155         av_freep(&tf_model->outputs);
156         return DNN_ERROR;
157     }
158
159     tf_model->nb_output = nb_output;
160
161     if (tf_model->session){
162         TF_CloseSession(tf_model->session, tf_model->status);
163         TF_DeleteSession(tf_model->session, tf_model->status);
164     }
165
166     sess_opts = TF_NewSessionOptions();
167     tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
168     TF_DeleteSessionOptions(sess_opts);
169     if (TF_GetCode(tf_model->status) != TF_OK)
170     {
171         return DNN_ERROR;
172     }
173
174     // Run initialization operation with name "init" if it is present in graph
175     if (init_op){
176         TF_SessionRun(tf_model->session, NULL,
177                       NULL, NULL, 0,
178                       NULL, NULL, 0,
179                       &init_op, 1, NULL, tf_model->status);
180         if (TF_GetCode(tf_model->status) != TF_OK)
181         {
182             return DNN_ERROR;
183         }
184     }
185
186     return DNN_SUCCESS;
187 }
188
189 static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
190 {
191     TF_Buffer *graph_def;
192     TF_ImportGraphDefOptions *graph_opts;
193
194     graph_def = read_graph(model_filename);
195     if (!graph_def){
196         return DNN_ERROR;
197     }
198     tf_model->graph = TF_NewGraph();
199     tf_model->status = TF_NewStatus();
200     graph_opts = TF_NewImportGraphDefOptions();
201     TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
202     TF_DeleteImportGraphDefOptions(graph_opts);
203     TF_DeleteBuffer(graph_def);
204     if (TF_GetCode(tf_model->status) != TF_OK){
205         TF_DeleteGraph(tf_model->graph);
206         TF_DeleteStatus(tf_model->status);
207         return DNN_ERROR;
208     }
209
210     return DNN_SUCCESS;
211 }
212
213 #define NAME_BUFFER_SIZE 256
214
215 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
216                                     ConvolutionalParams* params, const int layer)
217 {
218     TF_Operation *op;
219     TF_OperationDescription *op_desc;
220     TF_Output input;
221     int64_t strides[] = {1, 1, 1, 1};
222     TF_Tensor *tensor;
223     int64_t dims[4];
224     int dims_len;
225     char name_buffer[NAME_BUFFER_SIZE];
226     int32_t size;
227
228     size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
229     input.index = 0;
230
231     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
232     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
233     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
234     dims[0] = params->output_num;
235     dims[1] = params->kernel_size;
236     dims[2] = params->kernel_size;
237     dims[3] = params->input_num;
238     dims_len = 4;
239     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
240     memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
241     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
242     if (TF_GetCode(tf_model->status) != TF_OK){
243         return DNN_ERROR;
244     }
245     op = TF_FinishOperation(op_desc, tf_model->status);
246     if (TF_GetCode(tf_model->status) != TF_OK){
247         return DNN_ERROR;
248     }
249
250     snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
251     op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
252     input.oper = op;
253     TF_AddInput(op_desc, input);
254     input.oper = transpose_op;
255     TF_AddInput(op_desc, input);
256     TF_SetAttrType(op_desc, "T", TF_FLOAT);
257     TF_SetAttrType(op_desc, "Tperm", TF_INT32);
258     op = TF_FinishOperation(op_desc, tf_model->status);
259     if (TF_GetCode(tf_model->status) != TF_OK){
260         return DNN_ERROR;
261     }
262
263     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
264     op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
265     input.oper = *cur_op;
266     TF_AddInput(op_desc, input);
267     input.oper = op;
268     TF_AddInput(op_desc, input);
269     TF_SetAttrType(op_desc, "T", TF_FLOAT);
270     TF_SetAttrIntList(op_desc, "strides", strides, 4);
271     TF_SetAttrString(op_desc, "padding", "VALID", 5);
272     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
273     if (TF_GetCode(tf_model->status) != TF_OK){
274         return DNN_ERROR;
275     }
276
277     snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
278     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
279     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
280     dims[0] = params->output_num;
281     dims_len = 1;
282     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
283     memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
284     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
285     if (TF_GetCode(tf_model->status) != TF_OK){
286         return DNN_ERROR;
287     }
288     op = TF_FinishOperation(op_desc, tf_model->status);
289     if (TF_GetCode(tf_model->status) != TF_OK){
290         return DNN_ERROR;
291     }
292
293     snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
294     op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
295     input.oper = *cur_op;
296     TF_AddInput(op_desc, input);
297     input.oper = op;
298     TF_AddInput(op_desc, input);
299     TF_SetAttrType(op_desc, "T", TF_FLOAT);
300     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
301     if (TF_GetCode(tf_model->status) != TF_OK){
302         return DNN_ERROR;
303     }
304
305     snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
306     switch (params->activation){
307     case RELU:
308         op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
309         break;
310     case TANH:
311         op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
312         break;
313     case SIGMOID:
314         op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
315         break;
316     default:
317         return DNN_ERROR;
318     }
319     input.oper = *cur_op;
320     TF_AddInput(op_desc, input);
321     TF_SetAttrType(op_desc, "T", TF_FLOAT);
322     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
323     if (TF_GetCode(tf_model->status) != TF_OK){
324         return DNN_ERROR;
325     }
326
327     return DNN_SUCCESS;
328 }
329
330 static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
331                                               DepthToSpaceParams *params, const int layer)
332 {
333     TF_OperationDescription *op_desc;
334     TF_Output input;
335     char name_buffer[NAME_BUFFER_SIZE];
336
337     snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
338     op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
339     input.oper = *cur_op;
340     input.index = 0;
341     TF_AddInput(op_desc, input);
342     TF_SetAttrType(op_desc, "T", TF_FLOAT);
343     TF_SetAttrInt(op_desc, "block_size", params->block_size);
344     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
345     if (TF_GetCode(tf_model->status) != TF_OK){
346         return DNN_ERROR;
347     }
348
349     return DNN_SUCCESS;
350 }
351
352 static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
353                                               LayerPadParams *params, const int layer)
354 {
355     TF_Operation *op;
356     TF_Tensor *tensor;
357     TF_OperationDescription *op_desc;
358     TF_Output input;
359     int32_t *pads;
360     int64_t pads_shape[] = {4, 2};
361
362     char name_buffer[NAME_BUFFER_SIZE];
363     snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
364
365     op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
366     TF_SetAttrType(op_desc, "dtype", TF_INT32);
367     tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
368     pads = (int32_t *)TF_TensorData(tensor);
369     pads[0] = params->paddings[0][0];
370     pads[1] = params->paddings[0][1];
371     pads[2] = params->paddings[1][0];
372     pads[3] = params->paddings[1][1];
373     pads[4] = params->paddings[2][0];
374     pads[5] = params->paddings[2][1];
375     pads[6] = params->paddings[3][0];
376     pads[7] = params->paddings[3][1];
377     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
378     if (TF_GetCode(tf_model->status) != TF_OK){
379         return DNN_ERROR;
380     }
381     op = TF_FinishOperation(op_desc, tf_model->status);
382     if (TF_GetCode(tf_model->status) != TF_OK){
383         return DNN_ERROR;
384     }
385
386     op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
387     input.oper = *cur_op;
388     input.index = 0;
389     TF_AddInput(op_desc, input);
390     input.oper = op;
391     TF_AddInput(op_desc, input);
392     TF_SetAttrType(op_desc, "T", TF_FLOAT);
393     TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
394     TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
395     *cur_op = TF_FinishOperation(op_desc, tf_model->status);
396     if (TF_GetCode(tf_model->status) != TF_OK){
397         return DNN_ERROR;
398     }
399
400     return DNN_SUCCESS;
401 }
402
403 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
404 {
405     int32_t layer;
406     TF_OperationDescription *op_desc;
407     TF_Operation *op;
408     TF_Operation *transpose_op;
409     TF_Tensor *tensor;
410     TF_Output input;
411     int32_t *transpose_perm;
412     int64_t transpose_perm_shape[] = {4};
413     int64_t input_shape[] = {1, -1, -1, -1};
414     DNNReturnType layer_add_res;
415     DNNModel *native_model = NULL;
416     ConvolutionalNetwork *conv_network;
417
418     native_model = ff_dnn_load_model_native(model_filename);
419     if (!native_model){
420         return DNN_ERROR;
421     }
422
423     conv_network = (ConvolutionalNetwork *)native_model->model;
424     tf_model->graph = TF_NewGraph();
425     tf_model->status = TF_NewStatus();
426
427 #define CLEANUP_ON_ERROR(tf_model) \
428     { \
429         TF_DeleteGraph(tf_model->graph); \
430         TF_DeleteStatus(tf_model->status); \
431         return DNN_ERROR; \
432     }
433
434     op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
435     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
436     TF_SetAttrShape(op_desc, "shape", input_shape, 4);
437     op = TF_FinishOperation(op_desc, tf_model->status);
438     if (TF_GetCode(tf_model->status) != TF_OK){
439         CLEANUP_ON_ERROR(tf_model);
440     }
441
442     op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
443     TF_SetAttrType(op_desc, "dtype", TF_INT32);
444     tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
445     transpose_perm = (int32_t *)TF_TensorData(tensor);
446     transpose_perm[0] = 1;
447     transpose_perm[1] = 2;
448     transpose_perm[2] = 3;
449     transpose_perm[3] = 0;
450     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
451     if (TF_GetCode(tf_model->status) != TF_OK){
452         CLEANUP_ON_ERROR(tf_model);
453     }
454     transpose_op = TF_FinishOperation(op_desc, tf_model->status);
455
456     for (layer = 0; layer < conv_network->layers_num; ++layer){
457         switch (conv_network->layers[layer].type){
458         case INPUT:
459             layer_add_res = DNN_SUCCESS;
460             break;
461         case CONV:
462             layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
463                                            (ConvolutionalParams *)conv_network->layers[layer].params, layer);
464             break;
465         case DEPTH_TO_SPACE:
466             layer_add_res = add_depth_to_space_layer(tf_model, &op,
467                                                      (DepthToSpaceParams *)conv_network->layers[layer].params, layer);
468             break;
469         case MIRROR_PAD:
470             layer_add_res = add_pad_layer(tf_model, &op,
471                                           (LayerPadParams *)conv_network->layers[layer].params, layer);
472             break;
473         default:
474             CLEANUP_ON_ERROR(tf_model);
475         }
476
477         if (layer_add_res != DNN_SUCCESS){
478             CLEANUP_ON_ERROR(tf_model);
479         }
480     }
481
482     op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
483     input.oper = op;
484     input.index = 0;
485     TF_AddInput(op_desc, input);
486     TF_FinishOperation(op_desc, tf_model->status);
487     if (TF_GetCode(tf_model->status) != TF_OK){
488         CLEANUP_ON_ERROR(tf_model);
489     }
490
491     ff_dnn_free_model_native(&native_model);
492
493     return DNN_SUCCESS;
494 }
495
496 DNNModel *ff_dnn_load_model_tf(const char *model_filename)
497 {
498     DNNModel *model = NULL;
499     TFModel *tf_model = NULL;
500
501     model = av_malloc(sizeof(DNNModel));
502     if (!model){
503         return NULL;
504     }
505
506     tf_model = av_mallocz(sizeof(TFModel));
507     if (!tf_model){
508         av_freep(&model);
509         return NULL;
510     }
511
512     if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
513         if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
514             av_freep(&tf_model);
515             av_freep(&model);
516
517             return NULL;
518         }
519     }
520
521     model->model = (void *)tf_model;
522     model->set_input_output = &set_input_output_tf;
523
524     return model;
525 }
526
527
528
529 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
530 {
531     TFModel *tf_model = (TFModel *)model->model;
532     uint32_t nb = FFMIN(nb_output, tf_model->nb_output);
533     if (nb == 0)
534         return DNN_ERROR;
535
536     av_assert0(tf_model->output_tensors);
537     for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
538         if (tf_model->output_tensors[i]) {
539             TF_DeleteTensor(tf_model->output_tensors[i]);
540             tf_model->output_tensors[i] = NULL;
541         }
542     }
543
544     TF_SessionRun(tf_model->session, NULL,
545                   &tf_model->input, &tf_model->input_tensor, 1,
546                   tf_model->outputs, tf_model->output_tensors, nb,
547                   NULL, 0, NULL, tf_model->status);
548
549     if (TF_GetCode(tf_model->status) != TF_OK){
550         return DNN_ERROR;
551     }
552
553     for (uint32_t i = 0; i < nb; ++i) {
554         outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1);
555         outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2);
556         outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3);
557         outputs[i].data = TF_TensorData(tf_model->output_tensors[i]);
558     }
559
560     return DNN_SUCCESS;
561 }
562
563 void ff_dnn_free_model_tf(DNNModel **model)
564 {
565     TFModel *tf_model;
566
567     if (*model){
568         tf_model = (TFModel *)(*model)->model;
569         if (tf_model->graph){
570             TF_DeleteGraph(tf_model->graph);
571         }
572         if (tf_model->session){
573             TF_CloseSession(tf_model->session, tf_model->status);
574             TF_DeleteSession(tf_model->session, tf_model->status);
575         }
576         if (tf_model->status){
577             TF_DeleteStatus(tf_model->status);
578         }
579         if (tf_model->input_tensor){
580             TF_DeleteTensor(tf_model->input_tensor);
581         }
582         if (tf_model->output_tensors) {
583             for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
584                 if (tf_model->output_tensors[i]) {
585                     TF_DeleteTensor(tf_model->output_tensors[i]);
586                     tf_model->output_tensors[i] = NULL;
587                 }
588             }
589         }
590         av_freep(&tf_model->outputs);
591         av_freep(&tf_model->output_tensors);
592         av_freep(&tf_model);
593         av_freep(model);
594     }
595 }