#include "../dnn_interface.h"
#include "libavformat/avio.h"
+#include "libavutil/opt.h"
-typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType;
-
+/**
+ * the enum value of DNNLayerType should not be changed,
+ * the same values are used in convert_from_tensorflow.py
+ * and, it is used to index the layer execution/load function pointer.
+ */
+typedef enum {
+ DLT_INPUT = 0,
+ DLT_CONV2D = 1,
+ DLT_DEPTH_TO_SPACE = 2,
+ DLT_MIRROR_PAD = 3,
+ DLT_MAXIMUM = 4,
+ DLT_MATH_BINARY = 5,
+ DLT_MATH_UNARY = 6,
+ DLT_AVG_POOL = 7,
+ DLT_DENSE = 8,
+ DLT_COUNT
+} DNNLayerType;
+
+typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType;
+typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;
typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
-typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNConvPaddingParam;
-
typedef struct Layer{
DNNLayerType type;
- float *output;
+ /**
+ * a layer can have multiple inputs and one output.
+ * 4 is just a big enough number for input operands (increase it if necessary),
+ * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
+ */
+ int32_t input_operand_indexes[4];
+ int32_t output_operand_index;
void *params;
} Layer;
-typedef struct ConvolutionalParams{
- int32_t input_num, output_num, kernel_size;
- DNNActivationFunc activation;
- DNNConvPaddingParam padding_method;
- int32_t dilation;
- float *kernel;
- float *biases;
-} ConvolutionalParams;
+typedef struct DnnOperand{
+ /**
+ * there are two memory layouts, NHWC or NCHW, so we use dims,
+ * dims[0] is Number.
+ */
+ int32_t dims[4];
+
+ /**
+ * input/output/intermediate operand of the network
+ */
+ DNNOperandType type;
+
+ /**
+ * support different kinds of data type such as float, half float, int8 etc,
+ * first support float now.
+ */
+ DNNDataType data_type;
+
+ /**
+ * NHWC if 1, otherwise NCHW.
+ * let's first support NHWC only, this flag is for extensive usage.
+ */
+ int8_t isNHWC;
+
+ /**
+ * to avoid possible memory leak, do not use char *name
+ */
+ char name[128];
+
+ /**
+ * data pointer with data length in bytes.
+ * usedNumbersLeft is only valid for intermediate operand,
+ * it means how many layers still depend on this operand,
+ * todo: the memory can be reused when usedNumbersLeft is zero.
+ */
+ void *data;
+ int32_t length;
+ int32_t usedNumbersLeft;
+}DnnOperand;
typedef struct InputParams{
int height, width, channels;
} InputParams;
-typedef struct DepthToSpaceParams{
- int block_size;
-} DepthToSpaceParams;
+typedef struct NativeOptions{
+ uint32_t conv2d_threads;
+} NativeOptions;
+
+typedef struct NativeContext {
+ const AVClass *class;
+ NativeOptions options;
+} NativeContext;
// Represents simple feed-forward convolutional network.
-typedef struct ConvolutionalNetwork{
+typedef struct NativeModel{
+ NativeContext ctx;
+ DNNModel *model;
Layer *layers;
int32_t layers_num;
-} ConvolutionalNetwork;
+ DnnOperand *operands;
+ int32_t operands_num;
+} NativeModel;
-DNNModel *ff_dnn_load_model_native(const char *model_filename);
+DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
-DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output);
+DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
+ const char **output_names, uint32_t nb_output, AVFrame *out_frame);
void ff_dnn_free_model_native(DNNModel **model);
+// NOTE: User must check for error (return value <= 0) to handle
+// case like integer overflow.
+int32_t ff_calculate_operand_data_length(const DnnOperand *oprd);
+int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd);
#endif