2 * Copyright (c) 2018 Sergey Lavrushkin
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * DNN inference functions interface for native backend.
27 #ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H
28 #define AVFILTER_DNN_DNN_BACKEND_NATIVE_H
30 #include "../dnn_interface.h"
31 #include "libavformat/avio.h"
32 #include "libavutil/opt.h"
35 * the enum value of DNNLayerType should not be changed,
36 * the same values are used in convert_from_tensorflow.py
37 * and, it is used to index the layer execution/load function pointer.
42 DLT_DEPTH_TO_SPACE = 2,
52 typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType;
53 typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;
54 typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
59 * a layer can have multiple inputs and one output.
60 * 4 is just a big enough number for input operands (increase it if necessary),
61 * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
63 int32_t input_operand_indexes[4];
64 int32_t output_operand_index;
68 typedef struct DnnOperand{
70 * there are two memory layouts, NHWC or NCHW, so we use dims,
76 * input/output/intermediate operand of the network
81 * support different kinds of data type such as float, half float, int8 etc,
82 * first support float now.
84 DNNDataType data_type;
87 * NHWC if 1, otherwise NCHW.
88 * let's first support NHWC only, this flag is for extensive usage.
93 * to avoid possible memory leak, do not use char *name
98 * data pointer with data length in bytes.
99 * usedNumbersLeft is only valid for intermediate operand,
100 * it means how many layers still depend on this operand,
101 * todo: the memory can be reused when usedNumbersLeft is zero.
105 int32_t usedNumbersLeft;
108 typedef struct InputParams{
109 int height, width, channels;
112 typedef struct NativeOptions{
113 uint32_t conv2d_threads;
116 typedef struct NativeContext {
117 const AVClass *class;
118 NativeOptions options;
121 // Represents simple feed-forward convolutional network.
122 typedef struct NativeModel{
127 DnnOperand *operands;
128 int32_t operands_num;
131 DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, AVFilterContext *filter_ctx);
133 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
134 const char **output_names, uint32_t nb_output, AVFrame *out_frame);
136 void ff_dnn_free_model_native(DNNModel **model);
138 // NOTE: User must check for error (return value <= 0) to handle
139 // case like integer overflow.
140 int32_t ff_calculate_operand_data_length(const DnnOperand *oprd);
141 int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd);