dnn: add tf.nn.conv2d support for native model

author Guo, Yejun <yejun.guo@intel.com>

Mon, 21 Oct 2019 12:38:03 +0000 (20:38 +0800)

committer Pedro Arthur <bygrandao@gmail.com>

Wed, 30 Oct 2019 13:31:55 +0000 (10:31 -0300)
author Guo, Yejun <yejun.guo@intel.com>
Mon, 21 Oct 2019 12:38:03 +0000 (20:38 +0800)
committer Pedro Arthur <bygrandao@gmail.com>
Wed, 30 Oct 2019 13:31:55 +0000 (10:31 -0300)
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c

index 06b010d90ee5736be606dcfffdcc67b86823430b..ff280b5506c3de01773e70b690c044da34b39c4d 100644 (file)
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
      char header_expected[] = "FFMPEGDNNNATIVE";
      char *buf;
      size_t size;
-    int version, header_size, major_version_expected = 0;
+    int version, header_size, major_version_expected = 1;
      ConvolutionalNetwork *network = NULL;
      AVIOContext *model_file_context;
      int file_size, dnn_size, parsed_size;
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c

index 0de890217df8e3aa92690721f00c2f08cdbcea87..6ec0fa7a996c833788d8e4479029f10ed3625620 100644 (file)
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
      conv_params->input_num = (int32_t)avio_rl32(model_file_context);
      conv_params->output_num = (int32_t)avio_rl32(model_file_context);
      conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+    conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
+    dnn_size += 28;
+
      kernel_size = conv_params->input_num * conv_params->output_num *
-                  conv_params->kernel_size * conv_params->kernel_size;
-    dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
+                      conv_params->kernel_size * conv_params->kernel_size;
+    dnn_size += kernel_size * 4;
+    if (conv_params->has_bias)
+        dnn_size += conv_params->output_num * 4;
+
      if (dnn_size > file_size || conv_params->input_num <= 0 ||
          conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
          av_freep(&conv_params);
          return 0;
      }
+
      conv_params->kernel = av_malloc(kernel_size * sizeof(float));
-    conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
-    if (!conv_params->kernel || !conv_params->biases){
-        av_freep(&conv_params->kernel);
-        av_freep(&conv_params->biases);
+    if (!conv_params->kernel) {
          av_freep(&conv_params);
          return 0;
      }
-    for (int i = 0; i < kernel_size; ++i){
+    for (int i = 0; i < kernel_size; ++i) {
          conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
      }
-    for (int i = 0; i < conv_params->output_num; ++i){
-        conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+
+    conv_params->biases = NULL;
+    if (conv_params->has_bias) {
+        conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+        if (!conv_params->biases){
+            av_freep(&conv_params->kernel);
+            av_freep(&conv_params);
+            return 0;
+        }
+        for (int i = 0; i < conv_params->output_num; ++i){
+            conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+        }
      }
  
      layer->params = conv_params;
@@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
      for (int y = pad_size; y < height - pad_size; ++y) {
          for (int x = pad_size; x < width - pad_size; ++x) {
              for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
-                output[n_filter] = conv_params->biases[n_filter];
+                if (conv_params->has_bias)
+                    output[n_filter] = conv_params->biases[n_filter];
+                else
+                    output[n_filter] = 0.f;
  
                  for (int ch = 0; ch < conv_params->input_num; ++ch) {
                      for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) {
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h

index db90b2b6f67883559b1b42ffa1608e3769480d4d..bf872642dd2de89806ca8ef0cf2a231410801342 100644 (file)
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
@@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
      DNNActivationFunc activation;
      DNNConvPaddingParam padding_method;
      int32_t dilation;
+    int32_t has_bias;
      float *kernel;
      float *biases;
  } ConvolutionalParams;
diff --git a/tests/dnn/dnn-layer-conv2d-test.c b/tests/dnn/dnn-layer-conv2d-test.c

index 9d13da37c83ff75132340b693cb47e51dd5ee3a0..2da01e5372a9824a8c316462123fa72663d60888 100644 (file)
--- a/tests/dnn/dnn-layer-conv2d-test.c
+++ b/tests/dnn/dnn-layer-conv2d-test.c
@@ -97,6 +97,7 @@ static int test_with_same_dilate(void)
      float bias[2] = { -1.6574852, -0.72915393 };
  
      params.activation = TANH;
+    params.has_bias = 1;
      params.biases = bias;
      params.dilation = 2;
      params.input_num = 3;
@@ -196,6 +197,7 @@ static int test_with_valid(void)
      float bias[2] = { -0.4773722, -0.19620377 };
  
      params.activation = TANH;
+    params.has_bias = 1;
      params.biases = bias;
      params.dilation = 1;
      params.input_num = 3;
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py

index a663b3400400e606c45c6994fabf6581e455dc30..605158a32e2d1117bccdda95af2fa82671ceae42 100644 (file)
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -118,7 +118,7 @@ class TFConverter:
          return knode, bnode, dnode, anode
  
  
-    def dump_conv2d_to_file(self, node, f):
+    def dump_complex_conv2d_to_file(self, node, f):
          assert(node.op == 'Conv2D')
          self.layer_number = self.layer_number + 1
          self.converted_nodes.add(node.name)
@@ -153,7 +153,8 @@ class TFConverter:
          kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
          kernel = np.transpose(kernel, [3, 0, 1, 2])
  
-        np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f)
+        has_bias = 1
+        np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
          kernel.tofile(f)
  
          btensor = bnode.attr['value'].tensor
@@ -173,6 +174,41 @@ class TFConverter:
          np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
  
  
+    def dump_simple_conv2d_to_file(self, node, f):
+        assert(node.op == 'Conv2D')
+        self.layer_number = self.layer_number + 1
+        self.converted_nodes.add(node.name)
+
+        node0 = self.name_node_dict[node.input[0]]
+        node1 = self.name_node_dict[node.input[1]]
+        if node0.op == 'Const':
+            knode = node0
+            input_name = node.input[1]
+        else:
+            knode = node1
+            input_name = node.input[0]
+
+        ktensor = knode.attr['value'].tensor
+        filter_height = ktensor.tensor_shape.dim[0].size
+        filter_width = ktensor.tensor_shape.dim[1].size
+        in_channels = ktensor.tensor_shape.dim[2].size
+        out_channels = ktensor.tensor_shape.dim[3].size
+        kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
+        kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
+        kernel = np.transpose(kernel, [3, 0, 1, 2])
+
+        has_bias = 0
+        dilation = 1
+        padding = node.attr['padding'].s.decode("utf-8")
+        np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
+                  in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
+        kernel.tofile(f)
+
+        input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
+        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
+        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
+
+
      def dump_depth2space_to_file(self, node, f):
          assert(node.op == 'DepthToSpace')
          self.layer_number = self.layer_number + 1
@@ -222,10 +258,12 @@ class TFConverter:
              scope_name = TFConverter.get_scope_name(node.name)
              if scope_name in self.conv2d_scope_names:
                  if node.op == 'Conv2D':
-                    self.dump_conv2d_to_file(node, f)
+                    self.dump_complex_conv2d_to_file(node, f)
                  continue
  
-            if node.op == 'DepthToSpace':
+            if node.op == 'Conv2D':
+                self.dump_simple_conv2d_to_file(node, f)
+            elif node.op == 'DepthToSpace':
                  self.dump_depth2space_to_file(node, f)
              elif node.op == 'MirrorPad':
                  self.dump_mirrorpad_to_file(node, f)
@@ -312,10 +350,16 @@ class TFConverter:
  
  
      def generate_conv2d_scope_info(self):
-        # conv2d is a sub block in graph, get the scope name
+        # mostly, conv2d is a sub block in graph, get the scope name
          for node in self.nodes:
              if node.op == 'Conv2D':
                  scope = TFConverter.get_scope_name(node.name)
+                # for the case tf.nn.conv2d is called directly
+                if scope == '':
+                    continue
+                # for the case tf.nn.conv2d is called within a scope
+                if scope + '/kernel' not in self.name_node_dict:
+                    continue
                  self.conv2d_scope_names.add(scope)
  
          # get the input name to the conv2d sub block
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py

index 3c2acd5b1599034d555f2964406fea4467aed8ee..67672b2785ae46ffd5d40f76eca3e5a374142fee 100644 (file)
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -20,7 +20,7 @@
  str = 'FFMPEGDNNNATIVE'
  
  # increase major and reset minor when we have to re-convert the model file
-major = 0
+major = 1
  
  # increase minor when we don't have to re-convert the model file
-minor = 2
+minor = 0
author	Guo, Yejun <yejun.guo@intel.com>
	Mon, 21 Oct 2019 12:38:03 +0000 (20:38 +0800)
committer	Pedro Arthur <bygrandao@gmail.com>
	Wed, 30 Oct 2019 13:31:55 +0000 (10:31 -0300)
libavfilter/dnn/dnn_backend_native.c		patch \| blob \| history
libavfilter/dnn/dnn_backend_native_layer_conv2d.c		patch \| blob \| history
libavfilter/dnn/dnn_backend_native_layer_conv2d.h		patch \| blob \| history
tests/dnn/dnn-layer-conv2d-test.c		patch \| blob \| history
tools/python/convert_from_tensorflow.py		patch \| blob \| history
tools/python/convert_header.py		patch \| blob \| history