avcodec: postpone removal of deprecated libopenh264 wrapper options

[ffmpeg] / libavfilter / dnn / dnn_backend_native_layer_conv2d.c
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c

index 5c313454f73868389f532d45b01572d9260472a3..7a60aa6a4b783ffc8249c31d247ba95f73b2d7c6 100644 (file)
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -26,22 +26,24 @@
  #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
  
  //struct to pass parameters
-typedef struct thread_common_param{
+typedef struct ThreadCommonParam{
      DnnOperand *operands;
      const int32_t *input_operand_indexes;
      int32_t output_operand_index;
      const void *parameters;
      NativeContext *ctx;
      float *output_data;
-    int thread_num;
-} thread_common_param;
+} ThreadCommonParam;
  
-typedef struct thread_param{
-    thread_common_param *thread_common_param;
-    int thread_index;
-} thread_param;
+typedef struct ThreadParam{
+    ThreadCommonParam *thread_common_param;
+    int thread_start, thread_end;
+#if HAVE_PTHREAD_CANCEL
+    pthread_t thread;
+#endif
+} ThreadParam;
  
-int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
+int ff_dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
  {
      ConvolutionalParams *conv_params;
      int kernel_size;
@@ -71,7 +73,7 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
          return 0;
      }
  
-    conv_params->kernel = av_malloc(kernel_size * sizeof(float));
+    conv_params->kernel = av_malloc_array(kernel_size, sizeof(*conv_params->kernel));
      if (!conv_params->kernel) {
          av_freep(&conv_params);
          return 0;
@@ -82,7 +84,7 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
  
      conv_params->biases = NULL;
      if (conv_params->has_bias) {
-        conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+        conv_params->biases = av_malloc_array(conv_params->output_num, sizeof(*conv_params->biases));
          if (!conv_params->biases){
              av_freep(&conv_params->kernel);
              av_freep(&conv_params);
@@ -109,15 +111,15 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
  static void * dnn_execute_layer_conv2d_thread(void *threadarg)
  {
      //pass parameters
-    thread_param *thread_param = (struct thread_param *)threadarg;
-    thread_common_param *thread_common_param = thread_param->thread_common_param;
+    ThreadParam *thread_param = threadarg;
+    ThreadCommonParam *thread_common_param = thread_param->thread_common_param;
      DnnOperand *operands = thread_common_param->operands;
      int32_t input_operand_index = thread_common_param->input_operand_indexes[0];
      int height = operands[input_operand_index].dims[1];
      int width = operands[input_operand_index].dims[2];
      int channel = operands[input_operand_index].dims[3];
      const float *input = operands[input_operand_index].data;
-    const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(thread_common_param->parameters);
+    const ConvolutionalParams *conv_params = thread_common_param->parameters;
  
      int radius = conv_params->kernel_size >> 1;
      int src_linesize = width * conv_params->input_num;
@@ -125,16 +127,12 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg)
      int filter_size = conv_params->kernel_size * filter_linesize;
      int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
  
-    int thread_stride = (height - pad_size * 2) / thread_common_param->thread_num;
-    int thread_start = thread_stride * thread_param->thread_index + pad_size;
-    int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride);
-
      float *output = thread_common_param->output_data;
-    output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size);
+    output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_param->thread_start - pad_size);
  
      av_assert0(channel == conv_params->input_num);
  
-    for (int y = thread_start; y < thread_end; ++y) {
+    for (int y = thread_param->thread_start; y < thread_param->thread_end; ++y) {
          for (int x = pad_size; x < width - pad_size; ++x) {
              for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
                  if (conv_params->has_bias)
@@ -182,39 +180,45 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg)
              output += conv_params->output_num;
          }
      }
-    return (void *)DNN_SUCCESS;
+    return NULL;
  }
  
  
-int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
-                             int32_t output_operand_index, const void *parameters, NativeContext *ctx)
+int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
+                                int32_t output_operand_index, const void *parameters, NativeContext *ctx)
  {
+#if HAVE_PTHREAD_CANCEL
      int thread_num = (ctx->options.conv2d_threads <= 0 || ctx->options.conv2d_threads > av_cpu_count())
          ? (av_cpu_count() + 1) : (ctx->options.conv2d_threads);
-#if HAVE_PTHREAD_CANCEL
-    pthread_t *thread_id = av_malloc(thread_num * sizeof(pthread_t));
+    int ret = DNN_SUCCESS, thread_stride;
+    ThreadParam *thread_param;
+#else
+    ThreadParam thread_param = { 0 };
  #endif
-    thread_param **thread_param = av_malloc(thread_num * sizeof(*thread_param));
-    thread_common_param thread_common_param;
-    const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(parameters);
+    ThreadCommonParam thread_common_param;
+    const ConvolutionalParams *conv_params = parameters;
+    int height = operands[input_operand_indexes[0]].dims[1];
+    int width = operands[input_operand_indexes[0]].dims[2];
      int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
      DnnOperand *output_operand = &operands[output_operand_index];
+    void *tmp;
  
      output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
-    output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - pad_size * 2;
-    output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - pad_size * 2;
+    output_operand->dims[1] = height - pad_size * 2;
+    output_operand->dims[2] = width - pad_size * 2;
      output_operand->dims[3] = conv_params->output_num;
      output_operand->data_type = operands[input_operand_indexes[0]].data_type;
-    output_operand->length = calculate_operand_data_length(output_operand);
+    output_operand->length = ff_calculate_operand_data_length(output_operand);
      if (output_operand->length <= 0) {
          av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
          return DNN_ERROR;
      }
-    output_operand->data = av_realloc(output_operand->data, output_operand->length);
-    if (!output_operand->data) {
+    tmp = av_realloc(output_operand->data, output_operand->length);
+    if (!tmp) {
          av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
          return DNN_ERROR;
      }
+    output_operand->data = tmp;
      thread_common_param.output_data = output_operand->data;
      thread_common_param.operands = operands;
      thread_common_param.input_operand_indexes = input_operand_indexes;
@@ -223,36 +227,37 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
      thread_common_param.ctx = ctx;
  
  #if HAVE_PTHREAD_CANCEL
-    thread_common_param.thread_num = thread_num;
-
+    thread_param = av_malloc_array(thread_num, sizeof(*thread_param));
+    if (!thread_param)
+        return DNN_ERROR;
+    thread_stride = (height - pad_size * 2) / thread_num;
      //create threads
      for (int i = 0; i < thread_num; i++){
-        thread_param[i] = av_malloc(sizeof(**thread_param));
-        thread_param[i]->thread_common_param = &thread_common_param;
-        thread_param[i]->thread_index = i;
-        pthread_create(&thread_id[i], NULL, dnn_execute_layer_conv2d_thread, (void *)thread_param[i]);
+        thread_param[i].thread_common_param = &thread_common_param;
+        thread_param[i].thread_start = thread_stride * i + pad_size;
+        thread_param[i].thread_end = (i == thread_num - 1) ? (height - pad_size) : (thread_param[i].thread_start + thread_stride);
+        if (pthread_create(&thread_param[i].thread, NULL,
+                           dnn_execute_layer_conv2d_thread, &thread_param[i])) {
+            thread_num = i;
+            ret = DNN_ERROR;
+            break;
+        }
      }
  
-    //join threads, res gets function return
      for (int i = 0; i < thread_num; i++){
-        pthread_join(thread_id[i], NULL);
+        pthread_join(thread_param[i].thread, NULL);
      }
  
      //release memory
-    av_free(thread_id);
+    av_freep(&thread_param);
  
-    for (int i = 0; i < thread_num; i++){
-        av_free(thread_param[i]);
-    }
+    return ret;
  #else
-    thread_common_param.thread_num = 1;
-    thread_param[0] = av_malloc(sizeof(thread_param));
-    thread_param[0]->thread_common_param = &thread_common_param;
-    thread_param[0]->thread_index = 0;
-    dnn_execute_layer_conv2d_thread((void *)thread_param[0]);
-    av_free(thread_param[0]);
-#endif
+    thread_param.thread_common_param = &thread_common_param;
+    thread_param.thread_start = pad_size;
+    thread_param.thread_end = height - pad_size;
+    dnn_execute_layer_conv2d_thread(&thread_param);
  
-    av_free(thread_param);
      return DNN_SUCCESS;
+#endif
  }