anatolix
diff --git a/‎README.md
+11-1 b/‎README.md
+11-1
diff --git a/‎demo.ipynb
+31-29 b/‎demo.ipynb
+31-29
diff --git a/‎demo_image.py
+3-3 b/‎demo_image.py
+3-3
diff --git a/‎model.py
+69-31 b/‎model.py
+69-31
diff --git a/‎readme/tr_results.png
57.2 KB b/‎readme/tr_results.png
57.2 KB
diff --git a/‎training/generate_hdf5.py
+27-23 b/‎training/generate_hdf5.py
+27-23
diff --git a/‎training/inspect_dataset.ipynb
+16-16 b/‎training/inspect_dataset.ipynb
+16-16
@@ -57,7 +57,17 @@ any image file as an input.
   Download and compile the tool [dataset_transformer](https://github.com/michalfaber/rmpe_dataset_transformer). 
     Use this tool to create final datasets `dataset/train_dataset.h5` `dataset/val_dataset.h5`  
 - You can verify the datasets `inspect_dataset.ipynb` 
-- Start training `python train_pose.py` (TODO) 
+- Start training `python train_pose.py` 
+
+NOTE:
+I trained the model from scratch for 3,5 days on a single GPU 1070 but did't obtain satisfactory results.
+38 epochs is about 200000 iterations in caffe. 
+I noticed that reducing learning rate after the step 136106 (as in orginal caffe model) was probably too early
+because learning process slowed down.
+ 
+<div align="center">
+<img src="https://github.com/michalfaber/keras_Realtime_Multi-Person_Pose_Estimation/blob/master/readme/tr_results.png", width="450", height="563">
+</div>
 
 ## Related repository
 - CVPR'16, [Convolutional Pose Machines](https://github.com/shihenw/convolutional-pose-machines-release).
 
@@ -6,7 +6,7 @@
 import util
 from config_reader import config_reader
 from scipy.ndimage.filters import gaussian_filter
-from model import get_model
+from model import get_testing_model
 
 
 keras_weights_file = "model/keras/model.h5"
@@ -44,7 +44,7 @@ def process (input_image, params, model_params):
         imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'],
                                                           model_params['padValue'])
 
-        input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2))/256 - 0.5; # required shape (1, width, height, channels)
+        input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2)) # required shape (1, width, height, channels)
 
         output_blobs = model.predict(input_img)
 
@@ -238,7 +238,7 @@ def process (input_image, params, model_params):
     print('start processing...')
 
     # load model
-    model = get_model()
+    model = get_testing_model()
     model.load_weights(keras_weights_file)
 
     # load config
 
@@ -5,6 +5,7 @@
 from keras.layers.pooling import MaxPooling2D
 from keras.layers.merge import Multiply
 from keras.regularizers import l2
+from keras.initializers import random_normal,constant
 
 def relu(x): return Activation('relu')(x)
 
@@ -14,7 +15,9 @@ def conv(x, nf, ks, name, weight_decay):
 
     x = Conv2D(nf, (ks, ks), padding='same', name=name,
                kernel_regularizer=kernel_reg,
-               bias_regularizer=bias_reg)(x)
+               bias_regularizer=bias_reg,
+               kernel_initializer=random_normal(stddev=0.01),
+               bias_initializer=constant(0.0))(x)
     return x
 
 def pooling(x, ks, st, name):
@@ -62,7 +65,7 @@ def vgg_block(x, weight_decay):
     return x
 
 
-def stage1_block(x, x1, x2, num_p, branch, weight_decay):
+def stage1_block(x, num_p, branch, weight_decay):
     # Block 1
     x = conv(x, 128, 3, "Mconv1_stage1_L%d" % branch, (weight_decay, 0))
     x = relu(x)
@@ -74,17 +77,10 @@ def stage1_block(x, x1, x2, num_p, branch, weight_decay):
     x = relu(x)
     x = conv(x, num_p, 1, "Mconv5_stage1_L%d" % branch, (weight_decay, 0))
 
-    w_name = "weight_stage1_L%d" % branch
-    if num_p == 38:
-        w = Multiply(name=w_name)([x, x1]) # vec_weight
-
-    else:
-        w = Multiply(name=w_name)([x, x2])  # vec_heat
-
-    return x, w
+    return x
 
 
-def stageT_block(x, x1, x2, num_p, stage, branch, weight_decay):
+def stageT_block(x, num_p, stage, branch, weight_decay):
     # Block 1
     x = conv(x, 128, 7, "Mconv1_stage%d_L%d" % (stage, branch), (weight_decay, 0))
     x = relu(x)
@@ -100,17 +96,20 @@ def stageT_block(x, x1, x2, num_p, stage, branch, weight_decay):
     x = relu(x)
     x = conv(x, num_p, 1, "Mconv7_stage%d_L%d" % (stage, branch), (weight_decay, 0))
 
+    return x
+
+
+def apply_mask(x, mask1, mask2, num_p, stage, branch):
     w_name = "weight_stage%d_L%d" % (stage, branch)
     if num_p == 38:
-        w = Multiply(name=w_name)([x, x1]) # vec_weight
+        w = Multiply(name=w_name)([x, mask1]) # vec_weight
 
     else:
-        w = Multiply(name=w_name)([x, x2])  # vec_heat
+        w = Multiply(name=w_name)([x, mask2])  # vec_heat
+    return w
 
-    return x, w
 
-
-def get_model(training=True, weight_decay=None):
+def get_training_model(weight_decay):
 
     stages = 6
     np_branch1 = 38
@@ -131,38 +130,77 @@ def get_model(training=True, weight_decay=None):
     inputs.append(vec_weight_input)
     inputs.append(heat_weight_input)
 
-    img_normalized = Lambda(lambda x: x / 127.5 - 1.0)(img_input)
+    img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input)
 
     # VGG
     stage0_out = vgg_block(img_normalized, weight_decay)
 
-    # stage 1
-    stage1_branch1_out,w1 = stage1_block(stage0_out, vec_weight_input,
-                                         heat_weight_input, np_branch1, 1, weight_decay)
-    stage1_branch2_out,w2 = stage1_block(stage0_out, vec_weight_input,
-                                         heat_weight_input, np_branch2, 2, weight_decay)
+    # stage 1 - branch 1 (PAF)
+    stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1, weight_decay)
+    w1 = apply_mask(stage1_branch1_out, vec_weight_input, heat_weight_input, np_branch1, 1, 1)
+
+    # stage 1 - branch 2 (confidence maps)
+    stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2, weight_decay)
+    w2 = apply_mask(stage1_branch2_out, vec_weight_input, heat_weight_input, np_branch2, 1, 2)
+
     x = Concatenate()([stage1_branch1_out, stage1_branch2_out, stage0_out])
 
     outputs.append(w1)
     outputs.append(w2)
 
-    # stage t >= 2
-    #stageT_branch1_out = None
-    #stageT_branch2_out = None
+    # stage sn >= 2
     for sn in range(2, stages + 1):
-        stageT_branch1_out, w1 = stageT_block(x, vec_weight_input,
-                                         heat_weight_input, np_branch1, sn, 1, weight_decay)
-        stageT_branch2_out, w2 = stageT_block(x, vec_weight_input,
-                                         heat_weight_input, np_branch2, sn, 2, weight_decay)
+        # stage SN - branch 1 (PAF)
+        stageT_branch1_out = stageT_block(x, np_branch1, sn, 1, weight_decay)
+        w1 = apply_mask(stageT_branch1_out, vec_weight_input, heat_weight_input, np_branch1, sn, 1)
+
+        # stage SN - branch 2 (confidence maps)
+        stageT_branch2_out = stageT_block(x, np_branch2, sn, 2, weight_decay)
+        w2 = apply_mask(stageT_branch2_out, vec_weight_input, heat_weight_input, np_branch2, sn, 2)
 
         outputs.append(w1)
         outputs.append(w2)
 
         if (sn < stages):
             x = Concatenate()([stageT_branch1_out, stageT_branch2_out, stage0_out])
 
-    #outputs.insert(0, stageT_branch1_out)
-    #outputs.insert(1, stageT_branch2_out)
     model = Model(inputs=inputs, outputs=outputs)
 
+    return model
+
+
+def get_testing_model():
+    stages = 6
+    np_branch1 = 38
+    np_branch2 = 19
+
+    img_input_shape = (None, None, 3)
+
+    img_input = Input(shape=img_input_shape)
+
+    img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
+
+    # VGG
+    stage0_out = vgg_block(img_normalized, None)
+
+    # stage 1 - branch 1 (PAF)
+    stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1, None)
+
+    # stage 1 - branch 2 (confidence maps)
+    stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2, None)
+
+    x = Concatenate()([stage1_branch1_out, stage1_branch2_out, stage0_out])
+
+    # stage t >= 2
+    stageT_branch1_out = None
+    stageT_branch2_out = None
+    for sn in range(2, stages + 1):
+        stageT_branch1_out = stageT_block(x, np_branch1, sn, 1, None)
+        stageT_branch2_out = stageT_block(x, np_branch2, sn, 2, None)
+
+        if (sn < stages):
+            x = Concatenate()([stageT_branch1_out, stageT_branch2_out, stage0_out])
+
+    model = Model(inputs=[img_input], outputs=[stageT_branch1_out, stageT_branch2_out])
+
     return model
@@ -17,21 +17,23 @@
 val_img_dir = os.path.join(dataset_dir, "val2017")
 val_mask_dir = os.path.join(dataset_dir, "valmask2017")
 
-# datasets = [
-#     (tr_anno_path, tr_img_dir, tr_mask_dir, "COCO"),
-#     (val_anno_path, val_img_dir, val_mask_dir, "COCO")
-# ]
-
 datasets = [
+    (tr_anno_path, tr_img_dir, tr_mask_dir, "COCO"),
     (val_anno_path, val_img_dir, val_mask_dir, "COCO")
 ]
 
+#datasets = [
+#    (val_anno_path, val_img_dir, val_mask_dir, "COCO")
+#]
+
 joint_all = []
 tr_hdf5_path = os.path.join(dataset_dir, "train_pre_dataset.h5")
 val_hdf5_path = os.path.join(dataset_dir, "val_pre_dataset.h5")
 
 val_size = 2645 # size of validation set
 
+#val_size = 300
+
 def process():
     count = 0
     for _, ds in enumerate(datasets):
@@ -55,12 +57,8 @@ def process():
 
             print("Image ID ", img_id)
 
-            if i < val_size:
-                isValidation = 1
-            else:
-                isValidation = 0
-
             persons = []
+            prev_center = []
 
             for p in range(numPeople):
 
@@ -76,19 +74,18 @@ def process():
                 person_center = [img_anns[p]["bbox"][0] + img_anns[p]["bbox"][2] / 2,
                                  img_anns[p]["bbox"][1] + img_anns[p]["bbox"][3] / 2]
 
-                # # skip this person if the distance to exiting person is too small
-                # person_center = np.array((img_anns[p]["bbox"][0] + img_anns[p]["bbox"][2] / 2,
-                #                  img_anns[p]["bbox"][1] + img_anns[p]["bbox"][3] / 2))
-                # flag = 0
-                #
-                # for pc in prev_center:
-                #     dist = cdist(np.expand_dims(pc[:2], axis=0), np.expand_dims(person_center, axis=0))[0]
-                #     if dist < pc[2]*0.3:
-                #         flag = 1
-                #         continue
-                #
-                # if flag == 1:
-                #     continue
+                # skip this person if the distance to exiting person is too small
+                flag = 0
+                for pc in prev_center:
+                    a = np.expand_dims(pc[:2], axis=0)
+                    b = np.expand_dims(person_center, axis=0)
+                    dist = cdist(a, b)[0]
+                    if dist < pc[2]*0.3:
+                        flag = 1
+                        continue
+
+                if flag == 1:
+                    continue
 
                 pers["objpos"] = person_center
                 pers["bbox"] = img_anns[p]["bbox"]
@@ -110,13 +107,20 @@ def process():
                 pers["scale_provided"] = img_anns[p]["bbox"][3] / 368
 
                 persons.append(pers)
+                prev_center.append(np.append(person_center, max(img_anns[p]["bbox"][2], img_anns[p]["bbox"][3])))
+
 
             if len(persons) > 0:
 
                 joint_all.append(dict())
 
                 joint_all[count]["dataset"] = dataset_type
 
+                if count < val_size:
+                    isValidation = 1
+                else:
+                    isValidation = 0
+
                 joint_all[count]["isValidation"] = isValidation
 
                 joint_all[count]["img_width"] = w