git.sesse.net Git - ffmpeg/blob - libavfilter/dnn_backend_native.c

   1 /*
   2  * Copyright (c) 2018 Sergey Lavrushkin
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * DNN native backend implementation.
  24  */
  25
  26 #include "dnn_backend_native.h"
  27 #include "dnn_srcnn.h"
  28 #include "dnn_espcn.h"
  29 #include "libavformat/avio.h"
  30
  31 typedef enum {INPUT, CONV, DEPTH_TO_SPACE} LayerType;
  32
  33 typedef enum {RELU, TANH, SIGMOID} ActivationFunc;
  34
  35 typedef struct Layer{
  36     LayerType type;
  37     float* output;
  38     void* params;
  39 } Layer;
  40
  41 typedef struct ConvolutionalParams{
  42     int32_t input_num, output_num, kernel_size;
  43     ActivationFunc activation;
  44     float* kernel;
  45     float* biases;
  46 } ConvolutionalParams;
  47
  48 typedef struct InputParams{
  49     int height, width, channels;
  50 } InputParams;
  51
  52 typedef struct DepthToSpaceParams{
  53     int block_size;
  54 } DepthToSpaceParams;
  55
  56 // Represents simple feed-forward convolutional network.
  57 typedef struct ConvolutionalNetwork{
  58     Layer* layers;
  59     int32_t layers_num;
  60 } ConvolutionalNetwork;
  61
  62 static DNNReturnType set_input_output_native(void* model, DNNData* input, DNNData* output)
  63 {
  64     ConvolutionalNetwork* network = (ConvolutionalNetwork*)model;
  65     InputParams* input_params;
  66     ConvolutionalParams* conv_params;
  67     DepthToSpaceParams* depth_to_space_params;
  68     int cur_width, cur_height, cur_channels;
  69     int32_t layer;
  70
  71     if (network->layers_num <= 0 || network->layers[0].type != INPUT){
  72         return DNN_ERROR;
  73     }
  74     else{
  75         input_params = (InputParams*)network->layers[0].params;
  76         input_params->width = cur_width = input->width;
  77         input_params->height = cur_height = input->height;
  78         input_params->channels = cur_channels = input->channels;
  79         if (input->data){
  80             av_freep(&input->data);
  81         }
  82         network->layers[0].output = input->data = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
  83         if (!network->layers[0].output){
  84             return DNN_ERROR;
  85         }
  86     }
  87
  88     for (layer = 1; layer < network->layers_num; ++layer){
  89         switch (network->layers[layer].type){
  90         case CONV:
  91             conv_params = (ConvolutionalParams*)network->layers[layer].params;
  92             if (conv_params->input_num != cur_channels){
  93                 return DNN_ERROR;
  94             }
  95             cur_channels = conv_params->output_num;
  96             break;
  97         case DEPTH_TO_SPACE:
  98             depth_to_space_params = (DepthToSpaceParams*)network->layers[layer].params;
  99             if (cur_channels % (depth_to_space_params->block_size * depth_to_space_params->block_size) != 0){
 100                 return DNN_ERROR;
 101             }
 102             cur_channels = cur_channels / (depth_to_space_params->block_size * depth_to_space_params->block_size);
 103             cur_height *= depth_to_space_params->block_size;
 104             cur_width *= depth_to_space_params->block_size;
 105             break;
 106         default:
 107             return DNN_ERROR;
 108         }
 109         if (network->layers[layer].output){
 110             av_freep(&network->layers[layer].output);
 111         }
 112         network->layers[layer].output = av_malloc(cur_height * cur_width * cur_channels * sizeof(float));
 113         if (!network->layers[layer].output){
 114             return DNN_ERROR;
 115         }
 116     }
 117
 118     output->data = network->layers[network->layers_num - 1].output;
 119     output->height = cur_height;
 120     output->width = cur_width;
 121     output->channels = cur_channels;
 122
 123     return DNN_SUCCESS;
 124 }
 125
 126 // Loads model and its parameters that are stored in a binary file with following structure:
 127 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
 128 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
 129 // For DEPTH_TO_SPACE layer: block_size
 130 DNNModel* ff_dnn_load_model_native(const char* model_filename)
 131 {
 132     DNNModel* model = NULL;
 133     ConvolutionalNetwork* network = NULL;
 134     AVIOContext* model_file_context;
 135     int file_size, dnn_size, kernel_size, i;
 136     int32_t layer;
 137     LayerType layer_type;
 138     ConvolutionalParams* conv_params;
 139     DepthToSpaceParams* depth_to_space_params;
 140
 141     model = av_malloc(sizeof(DNNModel));
 142     if (!model){
 143         return NULL;
 144     }
 145
 146     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
 147         av_freep(&model);
 148         return NULL;
 149     }
 150     file_size = avio_size(model_file_context);
 151
 152     network = av_malloc(sizeof(ConvolutionalNetwork));
 153     if (!network){
 154         avio_closep(&model_file_context);
 155         av_freep(&model);
 156         return NULL;
 157     }
 158     model->model = (void*)network;
 159
 160     network->layers_num = 1 + (int32_t)avio_rl32(model_file_context);
 161     dnn_size = 4;
 162
 163     network->layers = av_malloc(network->layers_num * sizeof(Layer));
 164     if (!network->layers){
 165         av_freep(&network);
 166         avio_closep(&model_file_context);
 167         av_freep(&model);
 168         return NULL;
 169     }
 170
 171     for (layer = 0; layer < network->layers_num; ++layer){
 172         network->layers[layer].output = NULL;
 173         network->layers[layer].params = NULL;
 174     }
 175     network->layers[0].type = INPUT;
 176     network->layers[0].params = av_malloc(sizeof(InputParams));
 177     if (!network->layers[0].params){
 178         avio_closep(&model_file_context);
 179         ff_dnn_free_model_native(&model);
 180         return NULL;
 181     }
 182
 183     for (layer = 1; layer < network->layers_num; ++layer){
 184         layer_type = (int32_t)avio_rl32(model_file_context);
 185         dnn_size += 4;
 186         switch (layer_type){
 187         case CONV:
 188             conv_params = av_malloc(sizeof(ConvolutionalParams));
 189             if (!conv_params){
 190                 avio_closep(&model_file_context);
 191                 ff_dnn_free_model_native(&model);
 192                 return NULL;
 193             }
 194             conv_params->activation = (int32_t)avio_rl32(model_file_context);
 195             conv_params->input_num = (int32_t)avio_rl32(model_file_context);
 196             conv_params->output_num = (int32_t)avio_rl32(model_file_context);
 197             conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
 198             kernel_size = conv_params->input_num * conv_params->output_num *
 199                           conv_params->kernel_size * conv_params->kernel_size;
 200             dnn_size += 16 + (kernel_size + conv_params->output_num << 2);
 201             if (dnn_size > file_size || conv_params->input_num <= 0 ||
 202                 conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
 203                 avio_closep(&model_file_context);
 204                 ff_dnn_free_model_native(&model);
 205                 return NULL;
 206             }
 207             conv_params->kernel = av_malloc(kernel_size * sizeof(float));
 208             conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
 209             if (!conv_params->kernel || !conv_params->biases){
 210                 avio_closep(&model_file_context);
 211                 ff_dnn_free_model_native(&model);
 212                 return NULL;
 213             }
 214             for (i = 0; i < kernel_size; ++i){
 215                 conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
 216             }
 217             for (i = 0; i < conv_params->output_num; ++i){
 218                 conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
 219             }
 220             network->layers[layer].type = CONV;
 221             network->layers[layer].params = conv_params;
 222             break;
 223         case DEPTH_TO_SPACE:
 224             depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
 225             if (!depth_to_space_params){
 226                 avio_closep(&model_file_context);
 227                 ff_dnn_free_model_native(&model);
 228                 return NULL;
 229             }
 230             depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
 231             dnn_size += 4;
 232             network->layers[layer].type = DEPTH_TO_SPACE;
 233             network->layers[layer].params = depth_to_space_params;
 234             break;
 235         default:
 236             avio_closep(&model_file_context);
 237             ff_dnn_free_model_native(&model);
 238             return NULL;
 239         }
 240     }
 241
 242     avio_closep(&model_file_context);
 243
 244     if (dnn_size != file_size){
 245         ff_dnn_free_model_native(&model);
 246         return NULL;
 247     }
 248
 249     model->set_input_output = &set_input_output_native;
 250
 251     return model;
 252 }
 253
 254 static int set_up_conv_layer(Layer* layer, const float* kernel, const float* biases, ActivationFunc activation,
 255                              int32_t input_num, int32_t output_num, int32_t size)
 256 {
 257     ConvolutionalParams* conv_params;
 258     int kernel_size;
 259
 260     conv_params = av_malloc(sizeof(ConvolutionalParams));
 261     if (!conv_params){
 262         return DNN_ERROR;
 263     }
 264     conv_params->activation = activation;
 265     conv_params->input_num = input_num;
 266     conv_params->output_num = output_num;
 267     conv_params->kernel_size = size;
 268     kernel_size = input_num * output_num * size * size;
 269     conv_params->kernel = av_malloc(kernel_size * sizeof(float));
 270     conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
 271     if (!conv_params->kernel || !conv_params->biases){
 272         av_freep(&conv_params->kernel);
 273         av_freep(&conv_params->biases);
 274         av_freep(&conv_params);
 275         return DNN_ERROR;
 276     }
 277     memcpy(conv_params->kernel, kernel, kernel_size * sizeof(float));
 278     memcpy(conv_params->biases, biases, output_num * sizeof(float));
 279     layer->type = CONV;
 280     layer->params = conv_params;
 281
 282     return DNN_SUCCESS;
 283 }
 284
 285 DNNModel* ff_dnn_load_default_model_native(DNNDefaultModel model_type)
 286 {
 287     DNNModel* model = NULL;
 288     ConvolutionalNetwork* network = NULL;
 289     DepthToSpaceParams* depth_to_space_params;
 290     int32_t layer;
 291
 292     model = av_malloc(sizeof(DNNModel));
 293     if (!model){
 294         return NULL;
 295     }
 296
 297     network = av_malloc(sizeof(ConvolutionalNetwork));
 298     if (!network){
 299         av_freep(&model);
 300         return NULL;
 301     }
 302     model->model = (void*)network;
 303
 304     switch (model_type){
 305     case DNN_SRCNN:
 306         network->layers_num = 4;
 307         break;
 308     case DNN_ESPCN:
 309         network->layers_num = 5;
 310         break;
 311     default:
 312         av_freep(&network);
 313         av_freep(&model);
 314         return NULL;
 315     }
 316
 317     network->layers = av_malloc(network->layers_num * sizeof(Layer));
 318     if (!network->layers){
 319         av_freep(&network);
 320         av_freep(&model);
 321         return NULL;
 322     }
 323
 324     for (layer = 0; layer < network->layers_num; ++layer){
 325         network->layers[layer].output = NULL;
 326         network->layers[layer].params = NULL;
 327     }
 328     network->layers[0].type = INPUT;
 329     network->layers[0].params = av_malloc(sizeof(InputParams));
 330     if (!network->layers[0].params){
 331         ff_dnn_free_model_native(&model);
 332         return NULL;
 333     }
 334
 335     switch (model_type){
 336     case DNN_SRCNN:
 337         if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_biases, RELU, 1, 64, 9) != DNN_SUCCESS ||
 338             set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_biases, RELU, 64, 32, 1) != DNN_SUCCESS ||
 339             set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_biases, RELU, 32, 1, 5) != DNN_SUCCESS){
 340             ff_dnn_free_model_native(&model);
 341             return NULL;
 342         }
 343         break;
 344     case DNN_ESPCN:
 345         if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_biases, TANH, 1, 64, 5) != DNN_SUCCESS ||
 346             set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_biases, TANH, 64, 32, 3) != DNN_SUCCESS ||
 347             set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_biases, SIGMOID, 32, 4, 3) != DNN_SUCCESS){
 348             ff_dnn_free_model_native(&model);
 349             return NULL;
 350         }
 351         network->layers[4].type = DEPTH_TO_SPACE;
 352         depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
 353         if (!depth_to_space_params){
 354             ff_dnn_free_model_native(&model);
 355             return NULL;
 356         }
 357         depth_to_space_params->block_size = 2;
 358         network->layers[4].params = depth_to_space_params;
 359     }
 360
 361     model->set_input_output = &set_input_output_native;
 362
 363     return model;
 364 }
 365
 366 #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
 367
 368 static void convolve(const float* input, float* output, const ConvolutionalParams* conv_params, int width, int height)
 369 {
 370     int y, x, n_filter, ch, kernel_y, kernel_x;
 371     int radius = conv_params->kernel_size >> 1;
 372     int src_linesize = width * conv_params->input_num;
 373     int filter_linesize = conv_params->kernel_size * conv_params->input_num;
 374     int filter_size = conv_params->kernel_size * filter_linesize;
 375
 376     for (y = 0; y < height; ++y){
 377         for (x = 0; x < width; ++x){
 378             for (n_filter = 0; n_filter < conv_params->output_num; ++n_filter){
 379                 output[n_filter] = conv_params->biases[n_filter];
 380                 for (ch = 0; ch < conv_params->input_num; ++ch){
 381                     for (kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y){
 382                         for (kernel_x = 0; kernel_x < conv_params->kernel_size; ++kernel_x){
 383                             output[n_filter] += input[CLAMP_TO_EDGE(y + kernel_y - radius, height) * src_linesize +
 384                                                       CLAMP_TO_EDGE(x + kernel_x - radius, width) * conv_params->input_num + ch] *
 385                                                 conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
 386                                                                     kernel_x * conv_params->input_num + ch];
 387                         }
 388                     }
 389                 }
 390                 switch (conv_params->activation){
 391                 case RELU:
 392                     output[n_filter] = FFMAX(output[n_filter], 0.0);
 393                     break;
 394                 case TANH:
 395                     output[n_filter] = 2.0f  / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
 396                     break;
 397                 case SIGMOID:
 398                     output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
 399                 }
 400             }
 401             output += conv_params->output_num;
 402         }
 403     }
 404 }
 405
 406 static void depth_to_space(const float* input, float* output, int block_size, int width, int height, int channels)
 407 {
 408     int y, x, by, bx, ch;
 409     int new_channels = channels / (block_size * block_size);
 410     int output_linesize = width * channels;
 411     int by_linesize = output_linesize / block_size;
 412     int x_linesize = new_channels * block_size;
 413
 414     for (y = 0; y < height; ++y){
 415         for (x = 0; x < width; ++x){
 416             for (by = 0; by < block_size; ++by){
 417                 for (bx = 0; bx < block_size; ++bx){
 418                     for (ch = 0; ch < new_channels; ++ch){
 419                         output[by * by_linesize + x * x_linesize + bx * new_channels + ch] = input[ch];
 420                     }
 421                     input += new_channels;
 422                 }
 423             }
 424         }
 425         output += output_linesize;
 426     }
 427 }
 428
 429 DNNReturnType ff_dnn_execute_model_native(const DNNModel* model)
 430 {
 431     ConvolutionalNetwork* network = (ConvolutionalNetwork*)model->model;
 432     int cur_width, cur_height, cur_channels;
 433     int32_t layer;
 434     InputParams* input_params;
 435     ConvolutionalParams* conv_params;
 436     DepthToSpaceParams* depth_to_space_params;
 437
 438     if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
 439         return DNN_ERROR;
 440     }
 441     else{
 442         input_params = (InputParams*)network->layers[0].params;
 443         cur_width = input_params->width;
 444         cur_height = input_params->height;
 445         cur_channels = input_params->channels;
 446     }
 447
 448     for (layer = 1; layer < network->layers_num; ++layer){
 449         if (!network->layers[layer].output){
 450             return DNN_ERROR;
 451         }
 452         switch (network->layers[layer].type){
 453         case CONV:
 454             conv_params = (ConvolutionalParams*)network->layers[layer].params;
 455             convolve(network->layers[layer - 1].output, network->layers[layer].output, conv_params, cur_width, cur_height);
 456             cur_channels = conv_params->output_num;
 457             break;
 458         case DEPTH_TO_SPACE:
 459             depth_to_space_params = (DepthToSpaceParams*)network->layers[layer].params;
 460             depth_to_space(network->layers[layer - 1].output, network->layers[layer].output,
 461                            depth_to_space_params->block_size, cur_width, cur_height, cur_channels);
 462             cur_height *= depth_to_space_params->block_size;
 463             cur_width *= depth_to_space_params->block_size;
 464             cur_channels /= depth_to_space_params->block_size * depth_to_space_params->block_size;
 465             break;
 466         case INPUT:
 467             return DNN_ERROR;
 468         }
 469     }
 470
 471     return DNN_SUCCESS;
 472 }
 473
 474 void ff_dnn_free_model_native(DNNModel** model)
 475 {
 476     ConvolutionalNetwork* network;
 477     ConvolutionalParams* conv_params;
 478     int32_t layer;
 479
 480     if (*model)
 481     {
 482         network = (ConvolutionalNetwork*)(*model)->model;
 483         for (layer = 0; layer < network->layers_num; ++layer){
 484             av_freep(&network->layers[layer].output);
 485             if (network->layers[layer].type == CONV){
 486                 conv_params = (ConvolutionalParams*)network->layers[layer].params;
 487                 av_freep(&conv_params->kernel);
 488                 av_freep(&conv_params->biases);
 489             }
 490             av_freep(&network->layers[layer].params);
 491         }
 492         av_freep(network);
 493         av_freep(model);
 494     }
 495 }