git.sesse.net Git - ffmpeg/blob - libavfilter/dnn/dnn_backend_native.h

   1 /*
   2  * Copyright (c) 2018 Sergey Lavrushkin
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * DNN inference functions interface for native backend.
  24  */
  25
  26
  27 #ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H
  28 #define AVFILTER_DNN_DNN_BACKEND_NATIVE_H
  29
  30 #include "../dnn_interface.h"
  31 #include "libavformat/avio.h"
  32
  33 typedef enum {INPUT, CONV, DEPTH_TO_SPACE, MIRROR_PAD} DNNLayerType;
  34
  35 typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_INPUT} DNNOperandType;
  36
  37 typedef struct Layer{
  38     DNNLayerType type;
  39     /**
  40      * a layer can have multiple inputs and one output.
  41      * 4 is just a big enough number for input operands (increase it if necessary),
  42      * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
  43      */
  44     int32_t input_operand_indexes[4];
  45     int32_t output_operand_index;
  46     void *params;
  47 } Layer;
  48
  49 typedef struct DnnOperand{
  50     /**
  51      * there are two memory layouts, NHWC or NCHW, so we use dims,
  52      * dims[0] is Number.
  53      */
  54     int32_t dims[4];
  55
  56     /**
  57      * input/output/intermediate operand of the network
  58      */
  59     DNNOperandType type;
  60
  61     /**
  62      * support different kinds of data type such as float, half float, int8 etc,
  63      * first support float now.
  64      */
  65     DNNDataType data_type;
  66
  67     /**
  68      * NHWC if 1, otherwise NCHW.
  69      * let's first support NHWC only, this flag is for extensive usage.
  70      */
  71     int8_t isNHWC;
  72
  73     /**
  74      * to avoid possible memory leak, do not use char *name
  75      */
  76     char name[128];
  77
  78     /**
  79      * data pointer with data length in bytes.
  80      * usedNumbersLeft is only valid for intermediate operand,
  81      * it means how many layers still depend on this operand,
  82      * todo: the memory can be reused when usedNumbersLeft is zero.
  83      */
  84     void *data;
  85     int32_t length;
  86     int32_t usedNumbersLeft;
  87 }DnnOperand;
  88
  89 typedef struct InputParams{
  90     int height, width, channels;
  91 } InputParams;
  92
  93 // Represents simple feed-forward convolutional network.
  94 typedef struct ConvolutionalNetwork{
  95     Layer *layers;
  96     int32_t layers_num;
  97     DnnOperand *operands;
  98     int32_t operands_num;
  99 } ConvolutionalNetwork;
 100
 101 DNNModel *ff_dnn_load_model_native(const char *model_filename);
 102
 103 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output);
 104
 105 void ff_dnn_free_model_native(DNNModel **model);
 106
 107 int32_t calculate_operand_data_length(DnnOperand *operand);
 108
 109 #endif