git.sesse.net Git - ffmpeg/blob - libavfilter/dnn/dnn_backend_native.h

   1 /*
   2  * Copyright (c) 2018 Sergey Lavrushkin
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * DNN inference functions interface for native backend.
  24  */
  25
  26
  27 #ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H
  28 #define AVFILTER_DNN_DNN_BACKEND_NATIVE_H
  29
  30 #include "../dnn_interface.h"
  31 #include "libavformat/avio.h"
  32 #include "libavutil/opt.h"
  33
  34 /**
  35  * the enum value of DNNLayerType should not be changed,
  36  * the same values are used in convert_from_tensorflow.py
  37  * and, it is used to index the layer execution/load function pointer.
  38  */
  39 typedef enum {
  40     DLT_INPUT = 0,
  41     DLT_CONV2D = 1,
  42     DLT_DEPTH_TO_SPACE = 2,
  43     DLT_MIRROR_PAD = 3,
  44     DLT_MAXIMUM = 4,
  45     DLT_MATH_BINARY = 5,
  46     DLT_MATH_UNARY = 6,
  47     DLT_AVG_POOL = 7,
  48     DLT_DENSE = 8,
  49     DLT_COUNT
  50 } DNNLayerType;
  51
  52 typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType;
  53 typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;
  54 typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
  55
  56 typedef struct Layer{
  57     DNNLayerType type;
  58     /**
  59      * a layer can have multiple inputs and one output.
  60      * 4 is just a big enough number for input operands (increase it if necessary),
  61      * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
  62      */
  63     int32_t input_operand_indexes[4];
  64     int32_t output_operand_index;
  65     void *params;
  66 } Layer;
  67
  68 typedef struct DnnOperand{
  69     /**
  70      * there are two memory layouts, NHWC or NCHW, so we use dims,
  71      * dims[0] is Number.
  72      */
  73     int32_t dims[4];
  74
  75     /**
  76      * input/output/intermediate operand of the network
  77      */
  78     DNNOperandType type;
  79
  80     /**
  81      * support different kinds of data type such as float, half float, int8 etc,
  82      * first support float now.
  83      */
  84     DNNDataType data_type;
  85
  86     /**
  87      * NHWC if 1, otherwise NCHW.
  88      * let's first support NHWC only, this flag is for extensive usage.
  89      */
  90     int8_t isNHWC;
  91
  92     /**
  93      * to avoid possible memory leak, do not use char *name
  94      */
  95     char name[128];
  96
  97     /**
  98      * data pointer with data length in bytes.
  99      * usedNumbersLeft is only valid for intermediate operand,
 100      * it means how many layers still depend on this operand,
 101      * todo: the memory can be reused when usedNumbersLeft is zero.
 102      */
 103     void *data;
 104     int32_t length;
 105     int32_t usedNumbersLeft;
 106 }DnnOperand;
 107
 108 typedef struct InputParams{
 109     int height, width, channels;
 110 } InputParams;
 111
 112 typedef struct NativeOptions{
 113     uint32_t conv2d_threads;
 114 } NativeOptions;
 115
 116 typedef struct NativeContext {
 117     const AVClass *class;
 118     NativeOptions options;
 119 } NativeContext;
 120
 121 // Represents simple feed-forward convolutional network.
 122 typedef struct NativeModel{
 123     NativeContext ctx;
 124     DNNModel *model;
 125     Layer *layers;
 126     int32_t layers_num;
 127     DnnOperand *operands;
 128     int32_t operands_num;
 129 } NativeModel;
 130
 131 DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, AVFilterContext *filter_ctx);
 132
 133 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
 134                                           const char **output_names, uint32_t nb_output, AVFrame *out_frame);
 135
 136 void ff_dnn_free_model_native(DNNModel **model);
 137
 138 // NOTE: User must check for error (return value <= 0) to handle
 139 // case like integer overflow.
 140 int32_t ff_calculate_operand_data_length(const DnnOperand *oprd);
 141 int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd);
 142 #endif