git.sesse.net Git - ffmpeg/blob - libavfilter/dnn_backend_tf.c

   1 /*
   2  * Copyright (c) 2018 Sergey Lavrushkin
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * DNN tensorflow backend implementation.
  24  */
  25
  26 #include "dnn_backend_tf.h"
  27 #include "dnn_srcnn.h"
  28 #include "dnn_espcn.h"
  29 #include "libavformat/avio.h"
  30
  31 #include <tensorflow/c/c_api.h>
  32
  33 typedef struct TFModel{
  34     TF_Graph *graph;
  35     TF_Session *session;
  36     TF_Status *status;
  37     TF_Output input, output;
  38     TF_Tensor *input_tensor;
  39     DNNData *output_data;
  40 } TFModel;
  41
  42 static void free_buffer(void *data, size_t length)
  43 {
  44     av_freep(&data);
  45 }
  46
  47 static TF_Buffer *read_graph(const char *model_filename)
  48 {
  49     TF_Buffer *graph_buf;
  50     unsigned char *graph_data = NULL;
  51     AVIOContext *model_file_context;
  52     long size, bytes_read;
  53
  54     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
  55         return NULL;
  56     }
  57
  58     size = avio_size(model_file_context);
  59
  60     graph_data = av_malloc(size);
  61     if (!graph_data){
  62         avio_closep(&model_file_context);
  63         return NULL;
  64     }
  65     bytes_read = avio_read(model_file_context, graph_data, size);
  66     avio_closep(&model_file_context);
  67     if (bytes_read != size){
  68         av_freep(&graph_data);
  69         return NULL;
  70     }
  71
  72     graph_buf = TF_NewBuffer();
  73     graph_buf->data = (void *)graph_data;
  74     graph_buf->length = size;
  75     graph_buf->data_deallocator = free_buffer;
  76
  77     return graph_buf;
  78 }
  79
  80 static DNNReturnType set_input_output_tf(void *model, DNNData *input, DNNData *output)
  81 {
  82     TFModel *tf_model = (TFModel *)model;
  83     int64_t input_dims[] = {1, input->height, input->width, input->channels};
  84     TF_SessionOptions *sess_opts;
  85     const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init");
  86     TF_Tensor *output_tensor;
  87
  88     // Input operation should be named 'x'
  89     tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, "x");
  90     if (!tf_model->input.oper){
  91         return DNN_ERROR;
  92     }
  93     tf_model->input.index = 0;
  94     if (tf_model->input_tensor){
  95         TF_DeleteTensor(tf_model->input_tensor);
  96     }
  97     tf_model->input_tensor = TF_AllocateTensor(TF_FLOAT, input_dims, 4,
  98                                                input_dims[1] * input_dims[2] * input_dims[3] * sizeof(float));
  99     if (!tf_model->input_tensor){
 100         return DNN_ERROR;
 101     }
 102     input->data = (float *)TF_TensorData(tf_model->input_tensor);
 103
 104     // Output operation should be named 'y'
 105     tf_model->output.oper = TF_GraphOperationByName(tf_model->graph, "y");
 106     if (!tf_model->output.oper){
 107         return DNN_ERROR;
 108     }
 109     tf_model->output.index = 0;
 110
 111     if (tf_model->session){
 112         TF_CloseSession(tf_model->session, tf_model->status);
 113         TF_DeleteSession(tf_model->session, tf_model->status);
 114     }
 115
 116     sess_opts = TF_NewSessionOptions();
 117     tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
 118     TF_DeleteSessionOptions(sess_opts);
 119     if (TF_GetCode(tf_model->status) != TF_OK)
 120     {
 121         return DNN_ERROR;
 122     }
 123
 124     // Run initialization operation with name "init" if it is present in graph
 125     if (init_op){
 126         TF_SessionRun(tf_model->session, NULL,
 127                       NULL, NULL, 0,
 128                       NULL, NULL, 0,
 129                       &init_op, 1, NULL, tf_model->status);
 130         if (TF_GetCode(tf_model->status) != TF_OK)
 131         {
 132             return DNN_ERROR;
 133         }
 134     }
 135
 136     // Execute network to get output height, width and number of channels
 137     TF_SessionRun(tf_model->session, NULL,
 138                   &tf_model->input, &tf_model->input_tensor, 1,
 139                   &tf_model->output, &output_tensor, 1,
 140                   NULL, 0, NULL, tf_model->status);
 141     if (TF_GetCode(tf_model->status) != TF_OK){
 142         return DNN_ERROR;
 143     }
 144     else{
 145         output->height = TF_Dim(output_tensor, 1);
 146         output->width = TF_Dim(output_tensor, 2);
 147         output->channels = TF_Dim(output_tensor, 3);
 148         output->data = av_malloc(output->height * output->width * output->channels * sizeof(float));
 149         if (!output->data){
 150             return DNN_ERROR;
 151         }
 152         tf_model->output_data = output;
 153         TF_DeleteTensor(output_tensor);
 154     }
 155
 156     return DNN_SUCCESS;
 157 }
 158
 159 DNNModel *ff_dnn_load_model_tf(const char *model_filename)
 160 {
 161     DNNModel *model = NULL;
 162     TFModel *tf_model = NULL;
 163     TF_Buffer *graph_def;
 164     TF_ImportGraphDefOptions *graph_opts;
 165
 166     model = av_malloc(sizeof(DNNModel));
 167     if (!model){
 168         return NULL;
 169     }
 170
 171     tf_model = av_malloc(sizeof(TFModel));
 172     if (!tf_model){
 173         av_freep(&model);
 174         return NULL;
 175     }
 176     tf_model->session = NULL;
 177     tf_model->input_tensor = NULL;
 178     tf_model->output_data = NULL;
 179
 180     graph_def = read_graph(model_filename);
 181     if (!graph_def){
 182         av_freep(&tf_model);
 183         av_freep(&model);
 184         return NULL;
 185     }
 186     tf_model->graph = TF_NewGraph();
 187     tf_model->status = TF_NewStatus();
 188     graph_opts = TF_NewImportGraphDefOptions();
 189     TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
 190     TF_DeleteImportGraphDefOptions(graph_opts);
 191     TF_DeleteBuffer(graph_def);
 192     if (TF_GetCode(tf_model->status) != TF_OK){
 193         TF_DeleteGraph(tf_model->graph);
 194         TF_DeleteStatus(tf_model->status);
 195         av_freep(&tf_model);
 196         av_freep(&model);
 197         return NULL;
 198     }
 199
 200     model->model = (void *)tf_model;
 201     model->set_input_output = &set_input_output_tf;
 202
 203     return model;
 204 }
 205
 206 static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32_t pad)
 207 {
 208     TF_OperationDescription *op_desc;
 209     TF_Operation *op;
 210     TF_Tensor *tensor;
 211     TF_Output input;
 212     int32_t *pads;
 213     int64_t pads_shape[] = {4, 2};
 214
 215     op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
 216     TF_SetAttrType(op_desc, "dtype", TF_INT32);
 217     tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
 218     pads = (int32_t *)TF_TensorData(tensor);
 219     pads[0] = 0;   pads[1] = 0;
 220     pads[2] = pad; pads[3] = pad;
 221     pads[4] = pad; pads[5] = pad;
 222     pads[6] = 0;   pads[7] = 0;
 223     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
 224     if (TF_GetCode(tf_model->status) != TF_OK){
 225         return NULL;
 226     }
 227     op = TF_FinishOperation(op_desc, tf_model->status);
 228     if (TF_GetCode(tf_model->status) != TF_OK){
 229         return NULL;
 230     }
 231
 232     op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
 233     input.oper = input_op;
 234     input.index = 0;
 235     TF_AddInput(op_desc, input);
 236     input.oper = op;
 237     TF_AddInput(op_desc, input);
 238     TF_SetAttrType(op_desc, "T", TF_FLOAT);
 239     TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
 240     TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
 241     op = TF_FinishOperation(op_desc, tf_model->status);
 242     if (TF_GetCode(tf_model->status) != TF_OK){
 243         return NULL;
 244     }
 245
 246     return op;
 247 }
 248
 249 static TF_Operation *add_const_op(TFModel *tf_model, const float *values, const int64_t *dims, int dims_len, const char *name)
 250 {
 251     int dim;
 252     TF_OperationDescription *op_desc;
 253     TF_Tensor *tensor;
 254     size_t len;
 255
 256     op_desc = TF_NewOperation(tf_model->graph, "Const", name);
 257     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
 258     len = sizeof(float);
 259     for (dim = 0; dim < dims_len; ++dim){
 260         len *= dims[dim];
 261     }
 262     tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, len);
 263     memcpy(TF_TensorData(tensor), values, len);
 264     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
 265     if (TF_GetCode(tf_model->status) != TF_OK){
 266         return NULL;
 267     }
 268
 269     return TF_FinishOperation(op_desc, tf_model->status);
 270 }
 271
 272 static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, const int64_t **consts_dims,
 273                                      const int *consts_dims_len, const char **activations,
 274                                      TF_Operation *input_op, int layers_num)
 275 {
 276     int i;
 277     TF_OperationDescription *op_desc;
 278     TF_Operation *op;
 279     TF_Operation *transpose_op;
 280     TF_Output input;
 281     int64_t strides[] = {1, 1, 1, 1};
 282     int32_t *transpose_perm;
 283     TF_Tensor *tensor;
 284     int64_t transpose_perm_shape[] = {4};
 285     #define NAME_BUFF_SIZE 256
 286     char name_buffer[NAME_BUFF_SIZE];
 287
 288     op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
 289     TF_SetAttrType(op_desc, "dtype", TF_INT32);
 290     tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
 291     transpose_perm = (int32_t *)TF_TensorData(tensor);
 292     transpose_perm[0] = 1;
 293     transpose_perm[1] = 2;
 294     transpose_perm[2] = 3;
 295     transpose_perm[3] = 0;
 296     TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
 297     if (TF_GetCode(tf_model->status) != TF_OK){
 298         return NULL;
 299     }
 300     transpose_op = TF_FinishOperation(op_desc, tf_model->status);
 301     if (TF_GetCode(tf_model->status) != TF_OK){
 302         return NULL;
 303     }
 304
 305     input.index = 0;
 306     for (i = 0; i < layers_num; ++i){
 307         snprintf(name_buffer, NAME_BUFF_SIZE, "conv_kernel%d", i);
 308         op = add_const_op(tf_model, consts[i << 1], consts_dims[i << 1], consts_dims_len[i << 1], name_buffer);
 309         if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
 310             return NULL;
 311         }
 312
 313         snprintf(name_buffer, NAME_BUFF_SIZE, "transpose%d", i);
 314         op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
 315         input.oper = op;
 316         TF_AddInput(op_desc, input);
 317         input.oper = transpose_op;
 318         TF_AddInput(op_desc, input);
 319         TF_SetAttrType(op_desc, "T", TF_FLOAT);
 320         TF_SetAttrType(op_desc, "Tperm", TF_INT32);
 321         op = TF_FinishOperation(op_desc, tf_model->status);
 322         if (TF_GetCode(tf_model->status) != TF_OK){
 323             return NULL;
 324         }
 325
 326         snprintf(name_buffer, NAME_BUFF_SIZE, "conv2d%d", i);
 327         op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
 328         input.oper = input_op;
 329         TF_AddInput(op_desc, input);
 330         input.oper = op;
 331         TF_AddInput(op_desc, input);
 332         TF_SetAttrType(op_desc, "T", TF_FLOAT);
 333         TF_SetAttrIntList(op_desc, "strides", strides, 4);
 334         TF_SetAttrString(op_desc, "padding", "VALID", 5);
 335         input_op = TF_FinishOperation(op_desc, tf_model->status);
 336         if (TF_GetCode(tf_model->status) != TF_OK){
 337             return NULL;
 338         }
 339
 340         snprintf(name_buffer, NAME_BUFF_SIZE, "conv_biases%d", i);
 341         op = add_const_op(tf_model, consts[(i << 1) + 1], consts_dims[(i << 1) + 1], consts_dims_len[(i << 1) + 1], name_buffer);
 342         if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
 343             return NULL;
 344         }
 345
 346         snprintf(name_buffer, NAME_BUFF_SIZE, "bias_add%d", i);
 347         op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
 348         input.oper = input_op;
 349         TF_AddInput(op_desc, input);
 350         input.oper = op;
 351         TF_AddInput(op_desc, input);
 352         TF_SetAttrType(op_desc, "T", TF_FLOAT);
 353         input_op = TF_FinishOperation(op_desc, tf_model->status);
 354         if (TF_GetCode(tf_model->status) != TF_OK){
 355             return NULL;
 356         }
 357
 358         snprintf(name_buffer, NAME_BUFF_SIZE, "activation%d", i);
 359         op_desc = TF_NewOperation(tf_model->graph, activations[i], name_buffer);
 360         input.oper = input_op;
 361         TF_AddInput(op_desc, input);
 362         TF_SetAttrType(op_desc, "T", TF_FLOAT);
 363         input_op = TF_FinishOperation(op_desc, tf_model->status);
 364         if (TF_GetCode(tf_model->status) != TF_OK){
 365             return NULL;
 366         }
 367     }
 368
 369     return input_op;
 370 }
 371
 372 DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
 373 {
 374     DNNModel *model = NULL;
 375     TFModel *tf_model = NULL;
 376     TF_OperationDescription *op_desc;
 377     TF_Operation *op;
 378     TF_Output input;
 379     static const int64_t input_shape[] = {1, -1, -1, 1};
 380     static const char tanh[] = "Tanh";
 381     static const char sigmoid[] = "Sigmoid";
 382     static const char relu[] = "Relu";
 383
 384     static const float *srcnn_consts[] = {
 385         srcnn_conv1_kernel,
 386         srcnn_conv1_bias,
 387         srcnn_conv2_kernel,
 388         srcnn_conv2_bias,
 389         srcnn_conv3_kernel,
 390         srcnn_conv3_bias
 391     };
 392     static const long int *srcnn_consts_dims[] = {
 393         srcnn_conv1_kernel_dims,
 394         srcnn_conv1_bias_dims,
 395         srcnn_conv2_kernel_dims,
 396         srcnn_conv2_bias_dims,
 397         srcnn_conv3_kernel_dims,
 398         srcnn_conv3_bias_dims
 399     };
 400     static const int srcnn_consts_dims_len[] = {
 401         4,
 402         1,
 403         4,
 404         1,
 405         4,
 406         1
 407     };
 408     static const char *srcnn_activations[] = {
 409         relu,
 410         relu,
 411         relu
 412     };
 413
 414     static const float *espcn_consts[] = {
 415         espcn_conv1_kernel,
 416         espcn_conv1_bias,
 417         espcn_conv2_kernel,
 418         espcn_conv2_bias,
 419         espcn_conv3_kernel,
 420         espcn_conv3_bias
 421     };
 422     static const long int *espcn_consts_dims[] = {
 423         espcn_conv1_kernel_dims,
 424         espcn_conv1_bias_dims,
 425         espcn_conv2_kernel_dims,
 426         espcn_conv2_bias_dims,
 427         espcn_conv3_kernel_dims,
 428         espcn_conv3_bias_dims
 429     };
 430     static const int espcn_consts_dims_len[] = {
 431         4,
 432         1,
 433         4,
 434         1,
 435         4,
 436         1
 437     };
 438     static const char *espcn_activations[] = {
 439         tanh,
 440         tanh,
 441         sigmoid
 442     };
 443
 444     input.index = 0;
 445
 446     model = av_malloc(sizeof(DNNModel));
 447     if (!model){
 448         return NULL;
 449     }
 450
 451     tf_model = av_malloc(sizeof(TFModel));
 452     if (!tf_model){
 453         av_freep(&model);
 454         return NULL;
 455     }
 456     tf_model->session = NULL;
 457     tf_model->input_tensor = NULL;
 458     tf_model->output_data = NULL;
 459
 460     tf_model->graph = TF_NewGraph();
 461     tf_model->status = TF_NewStatus();
 462
 463     #define CLEANUP_ON_ERROR(tf_model, model) { \
 464         TF_DeleteGraph(tf_model->graph); \
 465         TF_DeleteStatus(tf_model->status); \
 466         av_freep(&tf_model); \
 467         av_freep(&model); \
 468         return NULL; \
 469     }
 470
 471     op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
 472     TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
 473     TF_SetAttrShape(op_desc, "shape", input_shape, 4);
 474     op = TF_FinishOperation(op_desc, tf_model->status);
 475     if (TF_GetCode(tf_model->status) != TF_OK){
 476         CLEANUP_ON_ERROR(tf_model, model);
 477     }
 478
 479     switch (model_type){
 480     case DNN_SRCNN:
 481         op = add_pad_op(tf_model, op, 6);
 482         if (!op){
 483             CLEANUP_ON_ERROR(tf_model, model);
 484         }
 485         op = add_conv_layers(tf_model, srcnn_consts,
 486                              srcnn_consts_dims, srcnn_consts_dims_len,
 487                              srcnn_activations, op, 3);
 488         if (!op){
 489             CLEANUP_ON_ERROR(tf_model, model);
 490         }
 491         break;
 492     case DNN_ESPCN:
 493         op = add_pad_op(tf_model, op, 4);
 494         if (!op){
 495             CLEANUP_ON_ERROR(tf_model, model);
 496         }
 497         op = add_conv_layers(tf_model, espcn_consts,
 498                              espcn_consts_dims, espcn_consts_dims_len,
 499                              espcn_activations, op, 3);
 500         if (!op){
 501             CLEANUP_ON_ERROR(tf_model, model);
 502         }
 503
 504         op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", "depth_to_space");
 505         input.oper = op;
 506         TF_AddInput(op_desc, input);
 507         TF_SetAttrType(op_desc, "T", TF_FLOAT);
 508         TF_SetAttrInt(op_desc, "block_size", 2);
 509         op = TF_FinishOperation(op_desc, tf_model->status);
 510         if (TF_GetCode(tf_model->status) != TF_OK){
 511             CLEANUP_ON_ERROR(tf_model, model);
 512         }
 513         break;
 514     default:
 515         CLEANUP_ON_ERROR(tf_model, model);
 516     }
 517
 518     op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
 519     input.oper = op;
 520     TF_AddInput(op_desc, input);
 521     TF_FinishOperation(op_desc, tf_model->status);
 522     if (TF_GetCode(tf_model->status) != TF_OK){
 523         CLEANUP_ON_ERROR(tf_model, model);
 524     }
 525
 526     model->model = (void *)tf_model;
 527     model->set_input_output = &set_input_output_tf;
 528
 529     return model;
 530 }
 531
 532 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model)
 533 {
 534     TFModel *tf_model = (TFModel *)model->model;
 535     TF_Tensor *output_tensor;
 536
 537     TF_SessionRun(tf_model->session, NULL,
 538                   &tf_model->input, &tf_model->input_tensor, 1,
 539                   &tf_model->output, &output_tensor, 1,
 540                   NULL, 0, NULL, tf_model->status);
 541
 542     if (TF_GetCode(tf_model->status) != TF_OK){
 543         return DNN_ERROR;
 544     }
 545     else{
 546         memcpy(tf_model->output_data->data, TF_TensorData(output_tensor),
 547                tf_model->output_data->height * tf_model->output_data->width *
 548                tf_model->output_data->channels * sizeof(float));
 549         TF_DeleteTensor(output_tensor);
 550
 551         return DNN_SUCCESS;
 552     }
 553 }
 554
 555 void ff_dnn_free_model_tf(DNNModel **model)
 556 {
 557     TFModel *tf_model;
 558
 559     if (*model){
 560         tf_model = (TFModel *)(*model)->model;
 561         if (tf_model->graph){
 562             TF_DeleteGraph(tf_model->graph);
 563         }
 564         if (tf_model->session){
 565             TF_CloseSession(tf_model->session, tf_model->status);
 566             TF_DeleteSession(tf_model->session, tf_model->status);
 567         }
 568         if (tf_model->status){
 569             TF_DeleteStatus(tf_model->status);
 570         }
 571         if (tf_model->input_tensor){
 572             TF_DeleteTensor(tf_model->input_tensor);
 573         }
 574         if (tf_model->output_data){
 575             av_freep(&(tf_model->output_data->data));
 576         }
 577         av_freep(&tf_model);
 578         av_freep(model);
 579     }
 580 }