Enable checkpoint restore from file.

[voxel-flow] / voxel_flow_train.py
diff --git a/voxel_flow_train.py b/voxel_flow_train.py

index dfe87b198e63a3286bb674361b7608b4509a74d6..e1fa96f2c3102f226148b18283eb1f5855cec4eb 100755 (executable)
--- a/voxel_flow_train.py
+++ b/voxel_flow_train.py
@@ -68,26 +68,38 @@ def train(dataset_frame1, dataset_frame2, dataset_frame3):
  
      # Create summaries
      summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)
-    summaries.append(tf.scalar_summary('total_loss', total_loss))
-    summaries.append(tf.scalar_summary('reproduction_loss', reproduction_loss))
-    # summaries.append(tf.scalar_summary('prior_loss', prior_loss))
-    summaries.append(tf.image_summary('Input Image', input_placeholder, 3))
-    summaries.append(tf.image_summary('Output Image', prediction, 3))
-    summaries.append(tf.image_summary('Target Image', target_placeholder, 3))
+    summaries.append(tf.summary.scalar('total_loss', total_loss))
+    summaries.append(tf.summary.scalar('reproduction_loss', reproduction_loss))
+    # summaries.append(tf.summary.scalar('prior_loss', prior_loss))
+    summaries.append(tf.summary.image('Input Image (before)', input_placeholder[:, :, :, 0:3], 3));
+    summaries.append(tf.summary.image('Input Image (after)', input_placeholder[:, :, :, 3:6], 3));
+    summaries.append(tf.summary.image('Output Image', prediction, 3))
+    summaries.append(tf.summary.image('Target Image', target_placeholder, 3))
  
      # Create a saver.
      saver = tf.train.Saver(tf.all_variables())
  
      # Build the summary operation from the last tower summaries.
-    summary_op = tf.merge_all_summaries()
+    summary_op = tf.summary.merge_all()
  
-    # Build an initialization operation to run below.
-    init = tf.initialize_all_variables()
-    sess = tf.Session()
-    sess.run(init)
+    # Restore checkpoint from file.
+    if FLAGS.pretrained_model_checkpoint_path:
+      sess = tf.Session()
+      assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path)
+      ckpt = tf.train.get_checkpoint_state(
+               FLAGS.pretrained_model_checkpoint_path)
+      restorer = tf.train.Saver()
+      restorer.restore(sess, ckpt.model_checkpoint_path)
+      print('%s: Pre-trained model restored from %s' %
+        (datetime.now(), ckpt.model_checkpoint_path))
+    else:
+      # Build an initialization operation to run below.
+      init = tf.initialize_all_variables()
+      sess = tf.Session()
+      sess.run(init)
  
      # Summary Writter
-    summary_writer = tf.train.SummaryWriter(
+    summary_writer = tf.summary.FileWriter(
        FLAGS.train_dir,
        graph=sess.graph)
  
@@ -118,7 +130,7 @@ def train(dataset_frame1, dataset_frame2, dataset_frame3):
      # load_fn_frame3 = partial(dataset_frame3.process_func)
      # p_queue_frame3 = PrefetchQueue(load_fn_frame3, data_list_frame3, FLAGS.batch_size, shuffle=False, num_workers=num_workers)
  
-    for step in xrange(0, FLAGS.max_steps):
+    for step in range(0, FLAGS.max_steps):
        batch_idx = step % epoch_num
        
        batch_data_list_frame1 = data_list_frame1[int(batch_idx * FLAGS.batch_size) : int((batch_idx + 1) * FLAGS.batch_size)]
@@ -149,11 +161,13 @@ def train(dataset_frame1, dataset_frame2, dataset_frame3):
          shuffle(data_list_frame3)
          print('Epoch Number: %d' % int(step / epoch_num))
        
-      # Output Summary 
        if step % 10 == 0:
-        # summary_str = sess.run(summary_op, feed_dict = feed_dict)
-        # summary_writer.add_summary(summary_str, step)
-             print("Loss at step %d: %f" % (step, loss_value))
+        print("Loss at step %d: %f" % (step, loss_value))
+
+      if step % 100 == 0:
+        # Output Summary 
+        summary_str = sess.run(summary_op, feed_dict = feed_dict)
+        summary_writer.add_summary(summary_str, step)
  
        if step % 500 == 0:
          # Run a batch of images