]> git.sesse.net Git - ffmpeg/blob - libavfilter/dnn_backend_tf.c
lavf/movenc: Fail when codec tag is invalid for format
[ffmpeg] / libavfilter / dnn_backend_tf.c
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * DNN tensorflow backend implementation.
24  */
25
26 #include "dnn_backend_tf.h"
27 #include "dnn_srcnn.h"
28 #include "dnn_espcn.h"
29 #include "libavformat/avio.h"
30
31 #include <tensorflow/c/c_api.h>
32
33 typedef struct TFModel{
34     TF_Graph *graph;
35     TF_Session *session;
36     TF_Status *status;
37     TF_Output input, output;
38     TF_Tensor *input_tensor;
39     DNNData *output_data;
40 } TFModel;
41
42 static void free_buffer(void *data, size_t length)
43 {
44     av_freep(&data);
45 }
46
47 static TF_Buffer *read_graph(const char *model_filename)
48 {
49     TF_Buffer *graph_buf;
50     unsigned char *graph_data = NULL;
51     AVIOContext *model_file_context;
52     long size, bytes_read;
53
54     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
55         return NULL;
56     }
57
58     size = avio_size(model_file_context);
59
60     graph_data = av_malloc(size);
61     if (!graph_data){
62         avio_closep(&model_file_context);
63         return NULL;
64     }
65     bytes_read = avio_read(model_file_context, graph_data, size);
66     avio_closep(&model_file_context);
67     if (bytes_read != size){
68         av_freep(&graph_data);
69         return NULL;
70     }
71
72     graph_buf = TF_NewBuffer();
73     graph_buf->data = (void *)graph_data;
74     graph_buf->length = size;
75     graph_buf->data_deallocator = free_buffer;
76
77     return graph_buf;
78 }
79
80 static DNNReturnType set_input_output_tf(void *model, DNNData *input, DNNData *output)
81 {
82     TFModel *tf_model = (TFModel *)model;
83     int64_t input_dims[] = {1, input->height, input->width, input->channels};
84     TF_SessionOptions *sess_opts;
85     const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
86     TF_Tensor *output_tensor;
87
88     // Input operation should be named 'x'
89     tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, "x");
90     if (!tf_model->input.oper){
91         return DNN_ERROR;
92     }
93     tf_model->input.index = 0;
94     if (tf_model->input_tensor){
95         TF_DeleteTensor(tf_model->input_tensor);
96     }
97     tf_model->input_tensor = TF_AllocateTensor(TF_FLOAT, input_dims, 4,
98                                                input_dims[1] * input_dims[2] * input_dims[3] * sizeof(float));
99     if (!tf_model->input_tensor){
100         return DNN_ERROR;
101     }
102     input->data = (float *)TF_TensorData(tf_model->input_tensor);
103
104     // Output operation should be named 'y'
105     tf_model->output.oper = TF_GraphOperationByName(tf_model->graph, "y");
106     if (!tf_model->output.oper){
107         return DNN_ERROR;
108     }
109     tf_model->output.index = 0;
110
111     if (tf_model->session){
112         TF_CloseSession(tf_model->session, tf_model->status);
113         TF_DeleteSession(tf_model->session, tf_model->status);
114     }
115
116     sess_opts = TF_NewSessionOptions();
117     tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
118     TF_DeleteSessionOptions(sess_opts);
119     if (TF_GetCode(tf_model->status) != TF_OK)
120     {
121         return DNN_ERROR;
122     }
123
124     // Run initialization operation with name "init" if it is present in graph
125     if (init_op){
126         TF_SessionRun(tf_model->session, NULL,
127                       NULL, NULL, 0,
128                       NULL, NULL, 0,
129                       &init_op, 1, NULL, tf_model->status);
130         if (TF_GetCode(tf_model->status) != TF_OK)
131         {
132             return DNN_ERROR;
133         }
134     }
135
136     // Execute network to get output height, width and number of channels
137     TF_SessionRun(tf_model->session, NULL,
138                   &tf_model->input, &tf_model->input_tensor, 1,
139                   &tf_model->output, &output_tensor, 1,
140                   NULL, 0, NULL, tf_model->status);
141     if (TF_GetCode(tf_model->status) != TF_OK){
142         return DNN_ERROR;
143     }
144     else{
145         output->height = TF_Dim(output_tensor, 1);
146         output->width = TF_Dim(output_tensor, 2);
147         output->channels = TF_Dim(output_tensor, 3);
148         output->data = av_malloc(output->height * output->width * output->channels * sizeof(float));
149         if (!output->data){
150             return DNN_ERROR;
151         }
152         tf_model->output_data = output;
153         TF_DeleteTensor(output_tensor);
154     }
155
156     return DNN_SUCCESS;
157 }
158
159 DNNModel *ff_dnn_load_model_tf(const char *model_filename)
160 {
161     DNNModel *model = NULL;
162     TFModel *tf_model = NULL;
163     TF_Buffer *graph_def;
164     TF_ImportGraphDefOptions *graph_opts;
165
166     model = av_malloc(sizeof(DNNModel));
167     if (!model){
168         return NULL;
169     }
170
171     tf_model = av_malloc(sizeof(TFModel));
172     if (!tf_model){
173         av_freep(&model);
174         return NULL;
175     }
176     tf_model->session = NULL;
177     tf_model->input_tensor = NULL;
178     tf_model->output_data = NULL;
179
180     graph_def = read_graph(model_filename);
181     if (!graph_def){
182         av_freep(&tf_model);
183         av_freep(&model);
184         return NULL;
185     }
186     tf_model->graph = TF_NewGraph();
187     tf_model->status = TF_NewStatus();
188     graph_opts = TF_NewImportGraphDefOptions();
189     TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
190     TF_DeleteImportGraphDefOptions(graph_opts);
191     TF_DeleteBuffer(graph_def);
192     if (TF_GetCode(tf_model->status) != TF_OK){
193         TF_DeleteGraph(tf_model->graph);
194         TF_DeleteStatus(tf_model->status);
195         av_freep(&tf_model);
196         av_freep(&model);
197         return NULL;
198     }
199
200     model->model = (void *)tf_model;
201     model->set_input_output = &set_input_output_tf;
202
203     return model;
204 }
205
206 static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32_t pad)
207 {
208     TF_OperationDescription *op_desc;
209     TF_Operation *op;
210     TF_Tensor *tensor;
211     TF_Output input;
212     int32_t *pads;
213     int64_t pads_shape[] = {4, 2};
214
215     op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
216     TF_SetAttrType(op_desc, "dtype", TF_INT32);
217     tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
218     pads = (int32_t *)TF_TensorData(tensor);
219     pads[0] = 0;   pads[1] = 0;
220     pads[2] = pad; pads[3] = pad;
221     pads[4] = pad; pads[5] = pad;
222     pads[6] = 0;   pads[7] = 0;
223     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
224     if (TF_GetCode(tf_model->status) != TF_OK){
225         return NULL;
226     }
227     op = TF_FinishOperation(op_desc, tf_model->status);
228     if (TF_GetCode(tf_model->status) != TF_OK){
229         return NULL;
230     }
231
232     op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
233     input.oper = input_op;
234     input.index = 0;
235     TF_AddInput(op_desc, input);
236     input.oper = op;
237     TF_AddInput(op_desc, input);
238     TF_SetAttrType(op_desc, "T", TF_FLOAT);
239     TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
240     TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
241     op = TF_FinishOperation(op_desc, tf_model->status);
242     if (TF_GetCode(tf_model->status) != TF_OK){
243         return NULL;
244     }
245
246     return op;
247 }
248
249 static TF_Operation *add_const_op(TFModel *tf_model, const float *values, const int64_t *dims, int dims_len, const char *name)
250 {
251     int dim;
252     TF_OperationDescription *op_desc;
253     TF_Tensor *tensor;
254     size_t len;
255
256     op_desc = TF_NewOperation(tf_model->graph, "Const", name);
257     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
258     len = sizeof(float);
259     for (dim = 0; dim < dims_len; ++dim){
260         len *= dims[dim];
261     }
262     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, len);
263     memcpy(TF_TensorData(tensor), values, len);
264     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
265     if (TF_GetCode(tf_model->status) != TF_OK){
266         return NULL;
267     }
268
269     return TF_FinishOperation(op_desc, tf_model->status);
270 }
271
272 static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, const int64_t **consts_dims,
273                                      const int *consts_dims_len, const char **activations,
274                                      TF_Operation *input_op, int layers_num)
275 {
276     int i;
277     TF_OperationDescription *op_desc;
278     TF_Operation *op;
279     TF_Operation *transpose_op;
280     TF_Output input;
281     int64_t strides[] = {1, 1, 1, 1};
282     int32_t *transpose_perm;
283     TF_Tensor *tensor;
284     int64_t transpose_perm_shape[] = {4};
285     #define NAME_BUFF_SIZE 256
286     char name_buffer[NAME_BUFF_SIZE];
287
288     op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
289     TF_SetAttrType(op_desc, "dtype", TF_INT32);
290     tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
291     transpose_perm = (int32_t *)TF_TensorData(tensor);
292     transpose_perm[0] = 1;
293     transpose_perm[1] = 2;
294     transpose_perm[2] = 3;
295     transpose_perm[3] = 0;
296     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
297     if (TF_GetCode(tf_model->status) != TF_OK){
298         return NULL;
299     }
300     transpose_op = TF_FinishOperation(op_desc, tf_model->status);
301     if (TF_GetCode(tf_model->status) != TF_OK){
302         return NULL;
303     }
304
305     input.index = 0;
306     for (i = 0; i < layers_num; ++i){
307         snprintf(name_buffer, NAME_BUFF_SIZE, "conv_kernel%d", i);
308         op = add_const_op(tf_model, consts[i << 1], consts_dims[i << 1], consts_dims_len[i << 1], name_buffer);
309         if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
310             return NULL;
311         }
312
313         snprintf(name_buffer, NAME_BUFF_SIZE, "transpose%d", i);
314         op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
315         input.oper = op;
316         TF_AddInput(op_desc, input);
317         input.oper = transpose_op;
318         TF_AddInput(op_desc, input);
319         TF_SetAttrType(op_desc, "T", TF_FLOAT);
320         TF_SetAttrType(op_desc, "Tperm", TF_INT32);
321         op = TF_FinishOperation(op_desc, tf_model->status);
322         if (TF_GetCode(tf_model->status) != TF_OK){
323             return NULL;
324         }
325
326         snprintf(name_buffer, NAME_BUFF_SIZE, "conv2d%d", i);
327         op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
328         input.oper = input_op;
329         TF_AddInput(op_desc, input);
330         input.oper = op;
331         TF_AddInput(op_desc, input);
332         TF_SetAttrType(op_desc, "T", TF_FLOAT);
333         TF_SetAttrIntList(op_desc, "strides", strides, 4);
334         TF_SetAttrString(op_desc, "padding", "VALID", 5);
335         input_op = TF_FinishOperation(op_desc, tf_model->status);
336         if (TF_GetCode(tf_model->status) != TF_OK){
337             return NULL;
338         }
339
340         snprintf(name_buffer, NAME_BUFF_SIZE, "conv_biases%d", i);
341         op = add_const_op(tf_model, consts[(i << 1) + 1], consts_dims[(i << 1) + 1], consts_dims_len[(i << 1) + 1], name_buffer);
342         if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
343             return NULL;
344         }
345
346         snprintf(name_buffer, NAME_BUFF_SIZE, "bias_add%d", i);
347         op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
348         input.oper = input_op;
349         TF_AddInput(op_desc, input);
350         input.oper = op;
351         TF_AddInput(op_desc, input);
352         TF_SetAttrType(op_desc, "T", TF_FLOAT);
353         input_op = TF_FinishOperation(op_desc, tf_model->status);
354         if (TF_GetCode(tf_model->status) != TF_OK){
355             return NULL;
356         }
357
358         snprintf(name_buffer, NAME_BUFF_SIZE, "activation%d", i);
359         op_desc = TF_NewOperation(tf_model->graph, activations[i], name_buffer);
360         input.oper = input_op;
361         TF_AddInput(op_desc, input);
362         TF_SetAttrType(op_desc, "T", TF_FLOAT);
363         input_op = TF_FinishOperation(op_desc, tf_model->status);
364         if (TF_GetCode(tf_model->status) != TF_OK){
365             return NULL;
366         }
367     }
368
369     return input_op;
370 }
371
372 DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
373 {
374     DNNModel *model = NULL;
375     TFModel *tf_model = NULL;
376     TF_OperationDescription *op_desc;
377     TF_Operation *op;
378     TF_Output input;
379     static const int64_t input_shape[] = {1, -1, -1, 1};
380     static const char tanh[] = "Tanh";
381     static const char sigmoid[] = "Sigmoid";
382     static const char relu[] = "Relu";
383
384     static const float *srcnn_consts[] = {
385         srcnn_conv1_kernel,
386         srcnn_conv1_bias,
387         srcnn_conv2_kernel,
388         srcnn_conv2_bias,
389         srcnn_conv3_kernel,
390         srcnn_conv3_bias
391     };
392     static const long int *srcnn_consts_dims[] = {
393         srcnn_conv1_kernel_dims,
394         srcnn_conv1_bias_dims,
395         srcnn_conv2_kernel_dims,
396         srcnn_conv2_bias_dims,
397         srcnn_conv3_kernel_dims,
398         srcnn_conv3_bias_dims
399     };
400     static const int srcnn_consts_dims_len[] = {
401         4,
402         1,
403         4,
404         1,
405         4,
406         1
407     };
408     static const char *srcnn_activations[] = {
409         relu,
410         relu,
411         relu
412     };
413
414     static const float *espcn_consts[] = {
415         espcn_conv1_kernel,
416         espcn_conv1_bias,
417         espcn_conv2_kernel,
418         espcn_conv2_bias,
419         espcn_conv3_kernel,
420         espcn_conv3_bias
421     };
422     static const long int *espcn_consts_dims[] = {
423         espcn_conv1_kernel_dims,
424         espcn_conv1_bias_dims,
425         espcn_conv2_kernel_dims,
426         espcn_conv2_bias_dims,
427         espcn_conv3_kernel_dims,
428         espcn_conv3_bias_dims
429     };
430     static const int espcn_consts_dims_len[] = {
431         4,
432         1,
433         4,
434         1,
435         4,
436         1
437     };
438     static const char *espcn_activations[] = {
439         tanh,
440         tanh,
441         sigmoid
442     };
443
444     input.index = 0;
445
446     model = av_malloc(sizeof(DNNModel));
447     if (!model){
448         return NULL;
449     }
450
451     tf_model = av_malloc(sizeof(TFModel));
452     if (!tf_model){
453         av_freep(&model);
454         return NULL;
455     }
456     tf_model->session = NULL;
457     tf_model->input_tensor = NULL;
458     tf_model->output_data = NULL;
459
460     tf_model->graph = TF_NewGraph();
461     tf_model->status = TF_NewStatus();
462
463     #define CLEANUP_ON_ERROR(tf_model, model) { \
464         TF_DeleteGraph(tf_model->graph); \
465         TF_DeleteStatus(tf_model->status); \
466         av_freep(&tf_model); \
467         av_freep(&model); \
468         return NULL; \
469     }
470
471     op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
472     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
473     TF_SetAttrShape(op_desc, "shape", input_shape, 4);
474     op = TF_FinishOperation(op_desc, tf_model->status);
475     if (TF_GetCode(tf_model->status) != TF_OK){
476         CLEANUP_ON_ERROR(tf_model, model);
477     }
478
479     switch (model_type){
480     case DNN_SRCNN:
481         op = add_pad_op(tf_model, op, 6);
482         if (!op){
483             CLEANUP_ON_ERROR(tf_model, model);
484         }
485         op = add_conv_layers(tf_model, srcnn_consts,
486                              srcnn_consts_dims, srcnn_consts_dims_len,
487                              srcnn_activations, op, 3);
488         if (!op){
489             CLEANUP_ON_ERROR(tf_model, model);
490         }
491         break;
492     case DNN_ESPCN:
493         op = add_pad_op(tf_model, op, 4);
494         if (!op){
495             CLEANUP_ON_ERROR(tf_model, model);
496         }
497         op = add_conv_layers(tf_model, espcn_consts,
498                              espcn_consts_dims, espcn_consts_dims_len,
499                              espcn_activations, op, 3);
500         if (!op){
501             CLEANUP_ON_ERROR(tf_model, model);
502         }
503
504         op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", "depth_to_space");
505         input.oper = op;
506         TF_AddInput(op_desc, input);
507         TF_SetAttrType(op_desc, "T", TF_FLOAT);
508         TF_SetAttrInt(op_desc, "block_size", 2);
509         op = TF_FinishOperation(op_desc, tf_model->status);
510         if (TF_GetCode(tf_model->status) != TF_OK){
511             CLEANUP_ON_ERROR(tf_model, model);
512         }
513         break;
514     default:
515         CLEANUP_ON_ERROR(tf_model, model);
516     }
517
518     op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
519     input.oper = op;
520     TF_AddInput(op_desc, input);
521     TF_FinishOperation(op_desc, tf_model->status);
522     if (TF_GetCode(tf_model->status) != TF_OK){
523         CLEANUP_ON_ERROR(tf_model, model);
524     }
525
526     model->model = (void *)tf_model;
527     model->set_input_output = &set_input_output_tf;
528
529     return model;
530 }
531
532 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model)
533 {
534     TFModel *tf_model = (TFModel *)model->model;
535     TF_Tensor *output_tensor;
536
537     TF_SessionRun(tf_model->session, NULL,
538                   &tf_model->input, &tf_model->input_tensor, 1,
539                   &tf_model->output, &output_tensor, 1,
540                   NULL, 0, NULL, tf_model->status);
541
542     if (TF_GetCode(tf_model->status) != TF_OK){
543         return DNN_ERROR;
544     }
545     else{
546         memcpy(tf_model->output_data->data, TF_TensorData(output_tensor),
547                tf_model->output_data->height * tf_model->output_data->width *
548                tf_model->output_data->channels * sizeof(float));
549         TF_DeleteTensor(output_tensor);
550
551         return DNN_SUCCESS;
552     }
553 }
554
555 void ff_dnn_free_model_tf(DNNModel **model)
556 {
557     TFModel *tf_model;
558
559     if (*model){
560         tf_model = (TFModel *)(*model)->model;
561         if (tf_model->graph){
562             TF_DeleteGraph(tf_model->graph);
563         }
564         if (tf_model->session){
565             TF_CloseSession(tf_model->session, tf_model->status);
566             TF_DeleteSession(tf_model->session, tf_model->status);
567         }
568         if (tf_model->status){
569             TF_DeleteStatus(tf_model->status);
570         }
571         if (tf_model->input_tensor){
572             TF_DeleteTensor(tf_model->input_tensor);
573         }
574         if (tf_model->output_data){
575             av_freep(&(tf_model->output_data->data));
576         }
577         av_freep(&tf_model);
578         av_freep(model);
579     }
580 }