git.sesse.net Git - ffmpeg/blob - libavfilter/dnn/dnn_backend_native.c

   1 /*
   2  * Copyright (c) 2018 Sergey Lavrushkin
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * DNN native backend implementation.
  24  */
  25
  26 #include "dnn_backend_native.h"
  27 #include "libavutil/avassert.h"
  28 #include "dnn_backend_native_layer_conv2d.h"
  29 #include "dnn_backend_native_layers.h"
  30
  31 static const AVClass dnn_native_class = {
  32     .class_name = "dnn_native",
  33     .item_name  = av_default_item_name,
  34     .option     = NULL,
  35     .version    = LIBAVUTIL_VERSION_INT,
  36     .category   = AV_CLASS_CATEGORY_FILTER,
  37 };
  38
  39 static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name)
  40 {
  41     NativeModel *native_model = (NativeModel *)model;
  42     NativeContext *ctx = &native_model->ctx;
  43
  44     for (int i = 0; i < native_model->operands_num; ++i) {
  45         DnnOperand *oprd = &native_model->operands[i];
  46         if (strcmp(oprd->name, input_name) == 0) {
  47             if (oprd->type != DOT_INPUT) {
  48                 av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
  49                 return DNN_ERROR;
  50             }
  51             input->dt = oprd->data_type;
  52             av_assert0(oprd->dims[0] == 1);
  53             input->height = oprd->dims[1];
  54             input->width = oprd->dims[2];
  55             input->channels = oprd->dims[3];
  56             return DNN_SUCCESS;
  57         }
  58     }
  59
  60     // do not find the input operand
  61     av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
  62     return DNN_ERROR;
  63 }
  64
  65 static DNNReturnType set_input_native(void *model, DNNData *input, const char *input_name)
  66 {
  67     NativeModel *native_model = (NativeModel *)model;
  68     NativeContext *ctx = &native_model->ctx;
  69     DnnOperand *oprd = NULL;
  70
  71     if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
  72         av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
  73         return DNN_ERROR;
  74     }
  75
  76     /* inputs */
  77     for (int i = 0; i < native_model->operands_num; ++i) {
  78         oprd = &native_model->operands[i];
  79         if (strcmp(oprd->name, input_name) == 0) {
  80             if (oprd->type != DOT_INPUT) {
  81                 av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
  82                 return DNN_ERROR;
  83             }
  84             break;
  85         }
  86         oprd = NULL;
  87     }
  88     if (!oprd) {
  89         av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
  90         return DNN_ERROR;
  91     }
  92
  93     oprd->dims[0] = 1;
  94     oprd->dims[1] = input->height;
  95     oprd->dims[2] = input->width;
  96     oprd->dims[3] = input->channels;
  97
  98     av_freep(&oprd->data);
  99     oprd->length = calculate_operand_data_length(oprd);
 100     if (oprd->length <= 0) {
 101         av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n");
 102         return DNN_ERROR;
 103     }
 104     oprd->data = av_malloc(oprd->length);
 105     if (!oprd->data) {
 106         av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n");
 107         return DNN_ERROR;
 108     }
 109
 110     input->data = oprd->data;
 111
 112     return DNN_SUCCESS;
 113 }
 114
 115 // Loads model and its parameters that are stored in a binary file with following structure:
 116 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
 117 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
 118 // For DEPTH_TO_SPACE layer: block_size
 119 DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options)
 120 {
 121     DNNModel *model = NULL;
 122     char header_expected[] = "FFMPEGDNNNATIVE";
 123     char *buf;
 124     size_t size;
 125     int version, header_size, major_version_expected = 1;
 126     NativeModel *native_model = NULL;
 127     AVIOContext *model_file_context;
 128     int file_size, dnn_size, parsed_size;
 129     int32_t layer;
 130     DNNLayerType layer_type;
 131
 132     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
 133         return NULL;
 134     }
 135     file_size = avio_size(model_file_context);
 136
 137     model = av_mallocz(sizeof(DNNModel));
 138     if (!model){
 139         goto fail;
 140     }
 141
 142     /**
 143      * check file header with string and version
 144      */
 145     size = sizeof(header_expected);
 146     buf = av_malloc(size);
 147     if (!buf) {
 148         goto fail;
 149     }
 150
 151     // size - 1 to skip the ending '\0' which is not saved in file
 152     avio_get_str(model_file_context, size - 1, buf, size);
 153     dnn_size = size - 1;
 154     if (strncmp(buf, header_expected, size) != 0) {
 155         av_freep(&buf);
 156         goto fail;
 157     }
 158     av_freep(&buf);
 159
 160     version = (int32_t)avio_rl32(model_file_context);
 161     dnn_size += 4;
 162     if (version != major_version_expected) {
 163         goto fail;
 164     }
 165
 166     // currently no need to check minor version
 167     version = (int32_t)avio_rl32(model_file_context);
 168     dnn_size += 4;
 169     header_size = dnn_size;
 170
 171     native_model = av_mallocz(sizeof(NativeModel));
 172     if (!native_model){
 173         goto fail;
 174     }
 175
 176     native_model->ctx.class = &dnn_native_class;
 177     model->model = (void *)native_model;
 178
 179     avio_seek(model_file_context, file_size - 8, SEEK_SET);
 180     native_model->layers_num = (int32_t)avio_rl32(model_file_context);
 181     native_model->operands_num = (int32_t)avio_rl32(model_file_context);
 182     dnn_size += 8;
 183     avio_seek(model_file_context, header_size, SEEK_SET);
 184
 185     native_model->layers = av_mallocz(native_model->layers_num * sizeof(Layer));
 186     if (!native_model->layers){
 187         goto fail;
 188     }
 189
 190     native_model->operands = av_mallocz(native_model->operands_num * sizeof(DnnOperand));
 191     if (!native_model->operands){
 192         goto fail;
 193     }
 194
 195     for (layer = 0; layer < native_model->layers_num; ++layer){
 196         layer_type = (int32_t)avio_rl32(model_file_context);
 197         dnn_size += 4;
 198
 199         if (layer_type >= DLT_COUNT) {
 200             goto fail;
 201         }
 202
 203         native_model->layers[layer].type = layer_type;
 204         parsed_size = layer_funcs[layer_type].pf_load(&native_model->layers[layer], model_file_context, file_size, native_model->operands_num);
 205         if (!parsed_size) {
 206             goto fail;
 207         }
 208         dnn_size += parsed_size;
 209     }
 210
 211     for (int32_t i = 0; i < native_model->operands_num; ++i){
 212         DnnOperand *oprd;
 213         int32_t name_len;
 214         int32_t operand_index = (int32_t)avio_rl32(model_file_context);
 215         dnn_size += 4;
 216
 217         if (operand_index >= native_model->operands_num) {
 218             goto fail;
 219         }
 220
 221         oprd = &native_model->operands[operand_index];
 222         name_len = (int32_t)avio_rl32(model_file_context);
 223         dnn_size += 4;
 224
 225         avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
 226         dnn_size += name_len;
 227
 228         oprd->type = (int32_t)avio_rl32(model_file_context);
 229         dnn_size += 4;
 230
 231         oprd->data_type = (int32_t)avio_rl32(model_file_context);
 232         dnn_size += 4;
 233
 234         for (int32_t dim = 0; dim < 4; ++dim) {
 235             oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
 236             dnn_size += 4;
 237         }
 238
 239         oprd->isNHWC = 1;
 240     }
 241
 242     avio_closep(&model_file_context);
 243
 244     if (dnn_size != file_size){
 245         ff_dnn_free_model_native(&model);
 246         return NULL;
 247     }
 248
 249     model->set_input = &set_input_native;
 250     model->get_input = &get_input_native;
 251     model->options = options;
 252
 253     return model;
 254
 255 fail:
 256     ff_dnn_free_model_native(&model);
 257     avio_closep(&model_file_context);
 258     return NULL;
 259 }
 260
 261 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output)
 262 {
 263     NativeModel *native_model = (NativeModel *)model->model;
 264     NativeContext *ctx = &native_model->ctx;
 265     int32_t layer;
 266
 267     if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
 268         av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
 269         return DNN_ERROR;
 270     }
 271     if (!native_model->operands[0].data) {
 272         av_log(ctx, AV_LOG_ERROR, "Empty model input data\n");
 273         return DNN_ERROR;
 274     }
 275
 276     for (layer = 0; layer < native_model->layers_num; ++layer){
 277         DNNLayerType layer_type = native_model->layers[layer].type;
 278         if (layer_funcs[layer_type].pf_exec(native_model->operands,
 279                                             native_model->layers[layer].input_operand_indexes,
 280                                             native_model->layers[layer].output_operand_index,
 281                                             native_model->layers[layer].params,
 282                                             &native_model->ctx) == DNN_ERROR) {
 283             av_log(ctx, AV_LOG_ERROR, "Failed to execuet model\n");
 284             return DNN_ERROR;
 285         }
 286     }
 287
 288     for (uint32_t i = 0; i < nb_output; ++i) {
 289         DnnOperand *oprd = NULL;
 290         const char *output_name = output_names[i];
 291         for (int j = 0; j < native_model->operands_num; ++j) {
 292             if (strcmp(native_model->operands[j].name, output_name) == 0) {
 293                 oprd = &native_model->operands[j];
 294                 break;
 295             }
 296         }
 297
 298         if (oprd == NULL) {
 299             av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n");
 300             return DNN_ERROR;
 301         }
 302
 303         outputs[i].data = oprd->data;
 304         outputs[i].height = oprd->dims[1];
 305         outputs[i].width = oprd->dims[2];
 306         outputs[i].channels = oprd->dims[3];
 307         outputs[i].dt = oprd->data_type;
 308     }
 309
 310     return DNN_SUCCESS;
 311 }
 312
 313 int32_t calculate_operand_dims_count(const DnnOperand *oprd)
 314 {
 315     int32_t result = 1;
 316     for (int i = 0; i < 4; ++i)
 317         result *= oprd->dims[i];
 318
 319     return result;
 320 }
 321
 322 int32_t calculate_operand_data_length(const DnnOperand* oprd)
 323 {
 324     // currently, we just support DNN_FLOAT
 325     uint64_t len = sizeof(float);
 326     for (int i = 0; i < 4; i++) {
 327         len *= oprd->dims[i];
 328         if (len > INT32_MAX)
 329             return 0;
 330     }
 331     return len;
 332 }
 333
 334 void ff_dnn_free_model_native(DNNModel **model)
 335 {
 336     NativeModel *native_model;
 337     ConvolutionalParams *conv_params;
 338     int32_t layer;
 339
 340     if (*model)
 341     {
 342         if ((*model)->model) {
 343             native_model = (NativeModel *)(*model)->model;
 344             if (native_model->layers) {
 345                 for (layer = 0; layer < native_model->layers_num; ++layer){
 346                     if (native_model->layers[layer].type == DLT_CONV2D){
 347                         conv_params = (ConvolutionalParams *)native_model->layers[layer].params;
 348                         av_freep(&conv_params->kernel);
 349                         av_freep(&conv_params->biases);
 350                     }
 351                     av_freep(&native_model->layers[layer].params);
 352                 }
 353                 av_freep(&native_model->layers);
 354             }
 355
 356             if (native_model->operands) {
 357                 for (uint32_t operand = 0; operand < native_model->operands_num; ++operand)
 358                     av_freep(&native_model->operands[operand].data);
 359                 av_freep(&native_model->operands);
 360             }
 361
 362             av_freep(&native_model);
 363         }
 364         av_freep(model);
 365     }
 366 }