X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Fdnn%2Fdnn_backend_native.c;h=4fc3ba2044b5fdc02ed5e44dc52bfa0b6b4b2a00;hb=18befac5da2c71aeb9922b6fd5551502f4c5a913;hp=65a56704d386c1d919b129325d892df0ca9f2f68;hpb=0f7a99e37ae52f9ecdc4c81195c14b03f5be3dfd;p=ffmpeg diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index 65a56704d38..4fc3ba2044b 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -27,16 +27,39 @@ #include "libavutil/avassert.h" #include "dnn_backend_native_layer_conv2d.h" #include "dnn_backend_native_layers.h" +#include "dnn_io_proc.h" + +#define OFFSET(x) offsetof(NativeContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM +static const AVOption dnn_native_options[] = { + { "conv2d_threads", "threads num for conv2d layer", OFFSET(options.conv2d_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS }, + { NULL }, +}; + +const AVClass dnn_native_class = { + .class_name = "dnn_native", + .item_name = av_default_item_name, + .option = dnn_native_options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_FILTER, +}; + +static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc); static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name) { NativeModel *native_model = (NativeModel *)model; + NativeContext *ctx = &native_model->ctx; for (int i = 0; i < native_model->operands_num; ++i) { DnnOperand *oprd = &native_model->operands[i]; if (strcmp(oprd->name, input_name) == 0) { - if (oprd->type != DOT_INPUT) + if (oprd->type != DOT_INPUT) { + av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name); return DNN_ERROR; + } input->dt = oprd->data_type; av_assert0(oprd->dims[0] == 1); input->height = oprd->dims[1]; @@ -47,54 +70,49 @@ static DNNReturnType get_input_native(void *model, DNNData *input, const char *i } // do not find the input operand + av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name); return DNN_ERROR; } -static DNNReturnType set_input_native(void *model, DNNData *input, const char *input_name) +static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) { + DNNReturnType ret; NativeModel *native_model = (NativeModel *)model; - DnnOperand *oprd = NULL; + NativeContext *ctx = &native_model->ctx; + AVFrame *in_frame = av_frame_alloc(); + AVFrame *out_frame = NULL; - if (native_model->layers_num <= 0 || native_model->operands_num <= 0) + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for input frame\n"); return DNN_ERROR; - - /* inputs */ - for (int i = 0; i < native_model->operands_num; ++i) { - oprd = &native_model->operands[i]; - if (strcmp(oprd->name, input_name) == 0) { - if (oprd->type != DOT_INPUT) - return DNN_ERROR; - break; - } - oprd = NULL; } - if (!oprd) - return DNN_ERROR; - - oprd->dims[0] = 1; - oprd->dims[1] = input->height; - oprd->dims[2] = input->width; - oprd->dims[3] = input->channels; + out_frame = av_frame_alloc(); - av_freep(&oprd->data); - oprd->length = calculate_operand_data_length(oprd); - if (oprd->length <= 0) - return DNN_ERROR; - oprd->data = av_malloc(oprd->length); - if (!oprd->data) + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "Could not allocate memory for output frame\n"); + av_frame_free(&in_frame); return DNN_ERROR; + } - input->data = oprd->data; + in_frame->width = input_width; + in_frame->height = input_height; - return DNN_SUCCESS; + ret = execute_model_native(native_model->model, input_name, in_frame, &output_name, 1, out_frame, 0); + *output_width = out_frame->width; + *output_height = out_frame->height; + + av_frame_free(&out_frame); + av_frame_free(&in_frame); + return ret; } // Loads model and its parameters that are stored in a binary file with following structure: // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters... // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases // For DEPTH_TO_SPACE layer: block_size -DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options) +DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata) { DNNModel *model = NULL; char header_expected[] = "FFMPEGDNNNATIVE"; @@ -150,7 +168,20 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio if (!native_model){ goto fail; } + + native_model->ctx.class = &dnn_native_class; + model->options = options; + if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0) + goto fail; model->model = (void *)native_model; + native_model->model = model; + +#if !HAVE_PTHREAD_CANCEL + if (native_model->ctx.options.conv2d_threads > 1){ + av_log(&native_model->ctx, AV_LOG_WARNING, "'conv2d_threads' option was set but it is not supported " + "on this build (pthread support is required)\n"); + } +#endif avio_seek(model_file_context, file_size - 8, SEEK_SET); native_model->layers_num = (int32_t)avio_rl32(model_file_context); @@ -222,9 +253,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio return NULL; } - model->set_input = &set_input_native; model->get_input = &get_input_native; - model->options = options; + model->get_output = &get_output_native; + model->userdata = userdata; return model; @@ -234,22 +265,82 @@ fail: return NULL; } -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output) +static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc) { NativeModel *native_model = (NativeModel *)model->model; + NativeContext *ctx = &native_model->ctx; int32_t layer; + DNNData input, output; + DnnOperand *oprd = NULL; - if (native_model->layers_num <= 0 || native_model->operands_num <= 0) + if (native_model->layers_num <= 0 || native_model->operands_num <= 0) { + av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n"); return DNN_ERROR; - if (!native_model->operands[0].data) + } + + for (int i = 0; i < native_model->operands_num; ++i) { + oprd = &native_model->operands[i]; + if (strcmp(oprd->name, input_name) == 0) { + if (oprd->type != DOT_INPUT) { + av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name); + return DNN_ERROR; + } + break; + } + oprd = NULL; + } + if (!oprd) { + av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name); + return DNN_ERROR; + } + + oprd->dims[1] = in_frame->height; + oprd->dims[2] = in_frame->width; + + av_freep(&oprd->data); + oprd->length = calculate_operand_data_length(oprd); + if (oprd->length <= 0) { + av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n"); + return DNN_ERROR; + } + oprd->data = av_malloc(oprd->length); + if (!oprd->data) { + av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n"); + return DNN_ERROR; + } + + input.height = oprd->dims[1]; + input.width = oprd->dims[2]; + input.channels = oprd->dims[3]; + input.data = oprd->data; + input.dt = oprd->data_type; + if (do_ioproc) { + if (native_model->model->pre_proc != NULL) { + native_model->model->pre_proc(in_frame, &input, native_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } + } + + if (nb_output != 1) { + // currently, the filter does not need multiple outputs, + // so we just pending the support until we really need it. + av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n"); return DNN_ERROR; + } for (layer = 0; layer < native_model->layers_num; ++layer){ DNNLayerType layer_type = native_model->layers[layer].type; - layer_funcs[layer_type].pf_exec(native_model->operands, - native_model->layers[layer].input_operand_indexes, - native_model->layers[layer].output_operand_index, - native_model->layers[layer].params); + if (layer_funcs[layer_type].pf_exec(native_model->operands, + native_model->layers[layer].input_operand_indexes, + native_model->layers[layer].output_operand_index, + native_model->layers[layer].params, + &native_model->ctx) == DNN_ERROR) { + av_log(ctx, AV_LOG_ERROR, "Failed to execuet model\n"); + return DNN_ERROR; + } } for (uint32_t i = 0; i < nb_output; ++i) { @@ -262,19 +353,51 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output } } - if (oprd == NULL) + if (oprd == NULL) { + av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n"); return DNN_ERROR; + } - outputs[i].data = oprd->data; - outputs[i].height = oprd->dims[1]; - outputs[i].width = oprd->dims[2]; - outputs[i].channels = oprd->dims[3]; - outputs[i].dt = oprd->data_type; + output.data = oprd->data; + output.height = oprd->dims[1]; + output.width = oprd->dims[2]; + output.channels = oprd->dims[3]; + output.dt = oprd->data_type; + + if (do_ioproc) { + if (native_model->model->post_proc != NULL) { + native_model->model->post_proc(out_frame, &output, native_model->model->userdata); + } else { + proc_from_dnn_to_frame(out_frame, &output, ctx); + } + } else { + out_frame->width = output.width; + out_frame->height = output.height; + } } return DNN_SUCCESS; } +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) +{ + NativeModel *native_model = (NativeModel *)model->model; + NativeContext *ctx = &native_model->ctx; + + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1); +} + int32_t calculate_operand_dims_count(const DnnOperand *oprd) { int32_t result = 1;