git.sesse.net Git - ffmpeg/blob - libavfilter/dnn/dnn_backend_native.c

   1 /*
   2  * Copyright (c) 2018 Sergey Lavrushkin
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * DNN native backend implementation.
  24  */
  25
  26 #include "dnn_backend_native.h"
  27 #include "libavutil/avassert.h"
  28 #include "dnn_backend_native_layer_pad.h"
  29 #include "dnn_backend_native_layer_conv2d.h"
  30
  31 static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
  32 {
  33     ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
  34
  35     if (network->layers_num <= 0 || network->operands_num <= 0)
  36         return DNN_ERROR;
  37
  38     av_assert0(input->dt == DNN_FLOAT);
  39
  40     /**
  41      * as the first step, suppose network->operands[0] is the input operand.
  42      */
  43     network->operands[0].dims[0] = 1;
  44     network->operands[0].dims[1] = input->height;
  45     network->operands[0].dims[2] = input->width;
  46     network->operands[0].dims[3] = input->channels;
  47     network->operands[0].type = DOT_INPUT;
  48     network->operands[0].data_type = DNN_FLOAT;
  49     network->operands[0].isNHWC = 1;
  50
  51     av_freep(&network->operands[0].data);
  52     network->operands[0].length = calculate_operand_data_length(&network->operands[0]);
  53     network->operands[0].data = av_malloc(network->operands[0].length);
  54     if (!network->operands[0].data)
  55         return DNN_ERROR;
  56
  57     input->data = network->operands[0].data;
  58     return DNN_SUCCESS;
  59 }
  60
  61 // Loads model and its parameters that are stored in a binary file with following structure:
  62 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
  63 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
  64 // For DEPTH_TO_SPACE layer: block_size
  65 DNNModel *ff_dnn_load_model_native(const char *model_filename)
  66 {
  67     DNNModel *model = NULL;
  68     char header_expected[] = "FFMPEGDNNNATIVE";
  69     char *buf;
  70     size_t size;
  71     int version, header_size, major_version_expected = 0;
  72     ConvolutionalNetwork *network = NULL;
  73     AVIOContext *model_file_context;
  74     int file_size, dnn_size, kernel_size, i;
  75     int32_t layer;
  76     DNNLayerType layer_type;
  77     ConvolutionalParams *conv_params;
  78     DepthToSpaceParams *depth_to_space_params;
  79     LayerPadParams *pad_params;
  80
  81     model = av_malloc(sizeof(DNNModel));
  82     if (!model){
  83         return NULL;
  84     }
  85
  86     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
  87         av_freep(&model);
  88         return NULL;
  89     }
  90     file_size = avio_size(model_file_context);
  91
  92     /**
  93      * check file header with string and version
  94      */
  95     size = sizeof(header_expected);
  96     buf = av_malloc(size);
  97     if (!buf) {
  98         avio_closep(&model_file_context);
  99         av_freep(&model);
 100         return NULL;
 101     }
 102
 103     // size - 1 to skip the ending '\0' which is not saved in file
 104     avio_get_str(model_file_context, size - 1, buf, size);
 105     dnn_size = size - 1;
 106     if (strncmp(buf, header_expected, size) != 0) {
 107         av_freep(&buf);
 108         avio_closep(&model_file_context);
 109         av_freep(&model);
 110         return NULL;
 111     }
 112     av_freep(&buf);
 113
 114     version = (int32_t)avio_rl32(model_file_context);
 115     dnn_size += 4;
 116     if (version != major_version_expected) {
 117         avio_closep(&model_file_context);
 118         av_freep(&model);
 119         return NULL;
 120     }
 121
 122     // currently no need to check minor version
 123     version = (int32_t)avio_rl32(model_file_context);
 124     dnn_size += 4;
 125     header_size = dnn_size;
 126
 127     network = av_mallocz(sizeof(ConvolutionalNetwork));
 128     if (!network){
 129         avio_closep(&model_file_context);
 130         av_freep(&model);
 131         return NULL;
 132     }
 133     model->model = (void *)network;
 134
 135     avio_seek(model_file_context, file_size - 8, SEEK_SET);
 136     network->layers_num = (int32_t)avio_rl32(model_file_context);
 137     network->operands_num = (int32_t)avio_rl32(model_file_context);
 138     dnn_size += 8;
 139     avio_seek(model_file_context, header_size, SEEK_SET);
 140
 141     network->layers = av_mallocz(network->layers_num * sizeof(Layer));
 142     if (!network->layers){
 143         avio_closep(&model_file_context);
 144         ff_dnn_free_model_native(&model);
 145         return NULL;
 146     }
 147
 148     network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand));
 149     if (!network->operands){
 150         avio_closep(&model_file_context);
 151         ff_dnn_free_model_native(&model);
 152         return NULL;
 153     }
 154
 155     for (layer = 0; layer < network->layers_num; ++layer){
 156         layer_type = (int32_t)avio_rl32(model_file_context);
 157         dnn_size += 4;
 158         switch (layer_type){
 159         case CONV:
 160             conv_params = av_malloc(sizeof(ConvolutionalParams));
 161             if (!conv_params){
 162                 avio_closep(&model_file_context);
 163                 ff_dnn_free_model_native(&model);
 164                 return NULL;
 165             }
 166             conv_params->dilation = (int32_t)avio_rl32(model_file_context);
 167             conv_params->padding_method = (int32_t)avio_rl32(model_file_context);
 168             conv_params->activation = (int32_t)avio_rl32(model_file_context);
 169             conv_params->input_num = (int32_t)avio_rl32(model_file_context);
 170             conv_params->output_num = (int32_t)avio_rl32(model_file_context);
 171             conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
 172             kernel_size = conv_params->input_num * conv_params->output_num *
 173                           conv_params->kernel_size * conv_params->kernel_size;
 174             dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
 175             if (dnn_size > file_size || conv_params->input_num <= 0 ||
 176                 conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
 177                 avio_closep(&model_file_context);
 178                 av_freep(&conv_params);
 179                 ff_dnn_free_model_native(&model);
 180                 return NULL;
 181             }
 182             conv_params->kernel = av_malloc(kernel_size * sizeof(float));
 183             conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
 184             if (!conv_params->kernel || !conv_params->biases){
 185                 avio_closep(&model_file_context);
 186                 av_freep(&conv_params->kernel);
 187                 av_freep(&conv_params->biases);
 188                 av_freep(&conv_params);
 189                 ff_dnn_free_model_native(&model);
 190                 return NULL;
 191             }
 192             for (i = 0; i < kernel_size; ++i){
 193                 conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
 194             }
 195             for (i = 0; i < conv_params->output_num; ++i){
 196                 conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
 197             }
 198             network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
 199             network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
 200             dnn_size += 8;
 201             network->layers[layer].type = CONV;
 202             network->layers[layer].params = conv_params;
 203             break;
 204         case DEPTH_TO_SPACE:
 205             depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
 206             if (!depth_to_space_params){
 207                 avio_closep(&model_file_context);
 208                 ff_dnn_free_model_native(&model);
 209                 return NULL;
 210             }
 211             depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
 212             dnn_size += 4;
 213             network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
 214             network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
 215             dnn_size += 8;
 216             network->layers[layer].type = DEPTH_TO_SPACE;
 217             network->layers[layer].params = depth_to_space_params;
 218             break;
 219         case MIRROR_PAD:
 220             pad_params = av_malloc(sizeof(LayerPadParams));
 221             if (!pad_params){
 222                 avio_closep(&model_file_context);
 223                 ff_dnn_free_model_native(&model);
 224                 return NULL;
 225             }
 226             pad_params->mode = (int32_t)avio_rl32(model_file_context);
 227             dnn_size += 4;
 228             for (i = 0; i < 4; ++i) {
 229                 pad_params->paddings[i][0] = avio_rl32(model_file_context);
 230                 pad_params->paddings[i][1] = avio_rl32(model_file_context);
 231                 dnn_size += 8;
 232             }
 233             network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
 234             network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
 235             dnn_size += 8;
 236             network->layers[layer].type = MIRROR_PAD;
 237             network->layers[layer].params = pad_params;
 238             break;
 239         default:
 240             avio_closep(&model_file_context);
 241             ff_dnn_free_model_native(&model);
 242             return NULL;
 243         }
 244     }
 245
 246     for (int32_t i = 0; i < network->operands_num; ++i){
 247         DnnOperand *oprd;
 248         int32_t name_len;
 249         int32_t operand_index = (int32_t)avio_rl32(model_file_context);
 250         dnn_size += 4;
 251
 252         oprd = &network->operands[operand_index];
 253         name_len = (int32_t)avio_rl32(model_file_context);
 254         dnn_size += 4;
 255
 256         avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
 257         dnn_size += name_len;
 258
 259         oprd->type = (int32_t)avio_rl32(model_file_context);
 260         dnn_size += 4;
 261
 262         oprd->data_type = (int32_t)avio_rl32(model_file_context);
 263         dnn_size += 4;
 264
 265         for (int32_t dim = 0; dim < 4; ++dim) {
 266             oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
 267             dnn_size += 4;
 268         }
 269
 270         oprd->isNHWC = 1;
 271     }
 272
 273     avio_closep(&model_file_context);
 274
 275     if (dnn_size != file_size){
 276         ff_dnn_free_model_native(&model);
 277         return NULL;
 278     }
 279
 280     model->set_input_output = &set_input_output_native;
 281
 282     return model;
 283 }
 284
 285 static int depth_to_space(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, int block_size)
 286 {
 287     float *output;
 288     int32_t input_operand_index = input_operand_indexes[0];
 289     int number = operands[input_operand_index].dims[0];
 290     int height = operands[input_operand_index].dims[1];
 291     int width = operands[input_operand_index].dims[2];
 292     int channels = operands[input_operand_index].dims[3];
 293     const float *input = operands[input_operand_index].data;
 294
 295     int y, x, by, bx, ch;
 296     int new_channels = channels / (block_size * block_size);
 297     int output_linesize = width * channels;
 298     int by_linesize = output_linesize / block_size;
 299     int x_linesize = new_channels * block_size;
 300
 301     DnnOperand *output_operand = &operands[output_operand_index];
 302     output_operand->dims[0] = number;
 303     output_operand->dims[1] = height * block_size;
 304     output_operand->dims[2] = width * block_size;
 305     output_operand->dims[3] = new_channels;
 306     output_operand->length = calculate_operand_data_length(output_operand);
 307     output_operand->data = av_realloc(output_operand->data, output_operand->length);
 308     if (!output_operand->data)
 309         return -1;
 310     output = output_operand->data;
 311
 312     for (y = 0; y < height; ++y){
 313         for (x = 0; x < width; ++x){
 314             for (by = 0; by < block_size; ++by){
 315                 for (bx = 0; bx < block_size; ++bx){
 316                     for (ch = 0; ch < new_channels; ++ch){
 317                         output[by * by_linesize + x * x_linesize + bx * new_channels + ch] = input[ch];
 318                     }
 319                     input += new_channels;
 320                 }
 321             }
 322         }
 323         output += output_linesize;
 324     }
 325     return 0;
 326 }
 327
 328 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
 329 {
 330     ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
 331     int32_t layer;
 332     ConvolutionalParams *conv_params;
 333     DepthToSpaceParams *depth_to_space_params;
 334     LayerPadParams *pad_params;
 335
 336     if (network->layers_num <= 0 || network->operands_num <= 0)
 337         return DNN_ERROR;
 338     if (!network->operands[0].data)
 339         return DNN_ERROR;
 340
 341     for (layer = 0; layer < network->layers_num; ++layer){
 342         switch (network->layers[layer].type){
 343         case CONV:
 344             conv_params = (ConvolutionalParams *)network->layers[layer].params;
 345             convolve(network->operands, network->layers[layer].input_operand_indexes,
 346                      network->layers[layer].output_operand_index, conv_params);
 347             break;
 348         case DEPTH_TO_SPACE:
 349             depth_to_space_params = (DepthToSpaceParams *)network->layers[layer].params;
 350             depth_to_space(network->operands, network->layers[layer].input_operand_indexes,
 351                            network->layers[layer].output_operand_index, depth_to_space_params->block_size);
 352             break;
 353         case MIRROR_PAD:
 354             pad_params = (LayerPadParams *)network->layers[layer].params;
 355             dnn_execute_layer_pad(network->operands, network->layers[layer].input_operand_indexes,
 356                                   network->layers[layer].output_operand_index, pad_params);
 357             break;
 358         case INPUT:
 359             return DNN_ERROR;
 360         }
 361     }
 362
 363     // native mode does not support multiple outputs yet
 364     if (nb_output > 1)
 365         return DNN_ERROR;
 366
 367     /**
 368      * as the first step, suppose network->operands[network->operands_num - 1] is the output operand.
 369      */
 370     outputs[0].data = network->operands[network->operands_num - 1].data;
 371     outputs[0].height = network->operands[network->operands_num - 1].dims[1];
 372     outputs[0].width = network->operands[network->operands_num - 1].dims[2];
 373     outputs[0].channels = network->operands[network->operands_num - 1].dims[3];
 374
 375     return DNN_SUCCESS;
 376 }
 377
 378 int32_t calculate_operand_data_length(DnnOperand* operand)
 379 {
 380     // currently, we just support DNN_FLOAT
 381     return operand->dims[0] * operand->dims[1] * operand->dims[2] * operand->dims[3] * sizeof(float);
 382 }
 383
 384 void ff_dnn_free_model_native(DNNModel **model)
 385 {
 386     ConvolutionalNetwork *network;
 387     ConvolutionalParams *conv_params;
 388     int32_t layer;
 389
 390     if (*model)
 391     {
 392         network = (ConvolutionalNetwork *)(*model)->model;
 393         for (layer = 0; layer < network->layers_num; ++layer){
 394             if (network->layers[layer].type == CONV){
 395                 conv_params = (ConvolutionalParams *)network->layers[layer].params;
 396                 av_freep(&conv_params->kernel);
 397                 av_freep(&conv_params->biases);
 398             }
 399             av_freep(&network->layers[layer].params);
 400         }
 401         av_freep(&network->layers);
 402
 403         for (uint32_t operand = 0; operand < network->operands_num; ++operand)
 404             av_freep(&network->operands[operand].data);
 405         av_freep(&network->operands);
 406
 407         av_freep(&network);
 408         av_freep(model);
 409     }
 410 }