changed mechnism of generating samples

michalfaber · michalfaber · commit ac2514261aac · 2017-10-19T20:37:51.000+02:00
diff --git a/README.md b/README.md
@@ -44,20 +44,35 @@ any image file as an input.
 - `python demo_camera.py` to run the web demo.
 
 ## Training steps
+**UPDATE 10/2017:**
+
+**-Augmented samples are fetched from the [server](https://github.com/michalfaber/rmpe_dataset_server). The network never sees the same image twice
+  which was a problem in previous approach (tool rmpe_dataset_transformer)
+  This allows you to run augmentation locally or on separate node. 
+  You can start 2 instances, one serving training set and a second one serving validation set (on different port if locally)** 
+  
+**-Experimentally I've added image normalization as in vgg paper (images should be zero-centered by mean pixel subtraction)**
+
 - Install gsutil `curl https://sdk.cloud.google.com | bash`. This is a really helpful tool for downloading large datasets. 
 - Download the data set (~25 GB) `cd dataset; sh get_dataset.sh`,
 - Download [COCO official toolbox](https://github.com/pdollar/coco) in `dataset/coco/` . 
 - `cd coco/PythonAPI; sudo python setup.py install` to install pycocotools.
 - Go to the "training" folder `cd ../../../training`.
 - Generate masks `python generate_masks.py`. Note: set the parameter "mode" in generate_masks.py (validation or training) 
 - Create intermediate dataset `python generate_hdf5.py`. This tool creates a dataset in hdf5 format. The structure of this dataset is very similar to the 
-    original lmdb dataset where a sample is represented as an array: 6 x width x height (3 channels for image, 1 channel for metedata, 2 channels for masks)    
-    Note: set the parameters "datasets", "val_size" in generate_hdf5.py         
-- The resulting intermediate hdf5 dataset has to be transformed to the more keras friendly format with data and labels ready to use in python generator.
-  Download and compile the tool [dataset_transformer](https://github.com/michalfaber/rmpe_dataset_transformer). 
-    Use this tool to create final datasets `dataset/train_dataset.h5` `dataset/val_dataset.h5`  
-- You can verify the datasets `inspect_dataset.ipynb` 
-- Start training `python train_pose.py` 
+    original lmdb dataset where a sample is represented as an array: 5 x width x height (3 channels for image, 1 channel for metedata, 1 channel for miss masks)
+    For MPI dataset there are 6 channels with additional all masks.
+    Note: set the parameters `datasets` and `val_size` in `generate_hdf5.py`
+- Download and compile the dataset server [rmpe_dataset_server](https://github.com/michalfaber/rmpe_dataset_server).
+  This server generates augmented samples on the fly. Source samples are retrieved from previously generated hdf5 dataset file.                           
+- Start training data server in the first terminal session. 
+    `./rmpe_dataset_server ../../keras_Realtime_Multi-Person_Pose_Estimation/dataset/train_dataset.h5 5555`
+- Start validation data server in a second terminal session.
+    `./rmpe_dataset_server ../../keras_Realtime_Multi-Person_Pose_Estimation/dataset/val_dataset.h5 5556`
+- Optionally you can verify the datasets `inspect_dataset.ipynb`
+- Set the correct number of samples within `python train_pose.py` - variables "train_samples = ???" and "val_samples = ???".  
+ This number is used by keras to determine how many samples are in 1 epoch.
+- Train the model in a third terminal `python train_pose.py`
 
 NOTE:
 I trained the model from scratch for 3,5 days on a single GPU 1070 but did't obtain satisfactory results.
diff --git a/demo_image.py b/demo_image.py
@@ -238,7 +238,10 @@ def process (input_image, params, model_params):
     print('start processing...')
 
     # load model
-    model = get_testing_model()
+
+    # authors of original model don't use
+    # vgg normalization (subtracting mean) on input images
+    model = get_testing_model(vgg_norm=False)
     model.load_weights(keras_weights_file)
 
     # load config
@@ -252,5 +255,7 @@ def process (input_image, params, model_params):
 
     cv2.imwrite(output, canvas)
 
+    cv2.destroyAllWindows()
+
 
 
diff --git a/model.py b/model.py
@@ -6,6 +6,7 @@
 from keras.layers.merge import Multiply
 from keras.regularizers import l2
 from keras.initializers import random_normal,constant
+import numpy as np
 
 def relu(x): return Activation('relu')(x)
 
@@ -109,7 +110,7 @@ def apply_mask(x, mask1, mask2, num_p, stage, branch):
     return w
 
 
-def get_training_model(weight_decay):
+def get_training_model(weight_decay, vgg_norm):
 
     stages = 6
     np_branch1 = 38
@@ -130,7 +131,11 @@ def get_training_model(weight_decay):
     inputs.append(vec_weight_input)
     inputs.append(heat_weight_input)
 
-    img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input)
+    if vgg_norm:
+        vgg_mean = np.array([103.939, 116.779, 123.68])  # BGR
+        img_normalized = Lambda(lambda x: x - vgg_mean)(img_input)
+    else:
+        img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
 
     # VGG
     stage0_out = vgg_block(img_normalized, weight_decay)
@@ -169,7 +174,7 @@ def get_training_model(weight_decay):
     return model
 
 
-def get_testing_model():
+def get_testing_model(vgg_norm=False):
     stages = 6
     np_branch1 = 38
     np_branch2 = 19
@@ -178,7 +183,11 @@ def get_testing_model():
 
     img_input = Input(shape=img_input_shape)
 
-    img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
+    if vgg_norm:
+        vgg_mean = np.array([103.939, 116.779, 123.68])  # BGR
+        img_normalized = Lambda(lambda x: x - vgg_mean)(img_input)
+    else:
+        img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
 
     # VGG
     stage0_out = vgg_block(img_normalized, None)
diff --git a/training/ds_generator_client.py b/training/ds_generator_client.py
@@ -0,0 +1,137 @@
+import numpy as np
+import zmq
+from ast import literal_eval as make_tuple
+
+import six
+if six.PY3:
+  buffer_ = memoryview
+else:
+  buffer_ = buffer  # noqa
+
+
+class DataGeneratorClient(object):
+
+    def __init__(self, host, port, hwm=20, batch_size=10):
+        """
+        :param host:
+        :param port:
+        :param hwm:, optional
+          The `ZeroMQ high-water mark (HWM)
+          <http://zguide.zeromq.org/page:all#High-Water-Marks>`_ on the
+          sending socket. Increasing this increases the buffer, which can be
+          useful if your data preprocessing times are very random.  However,
+          it will increase memory usage. There is no easy way to tell how
+          many batches will actually be queued with a particular HWM.
+          Defaults to 10. Be sure to set the corresponding HWM on the
+          receiving end as well.
+        :param batch_size:
+        :param shuffle:
+        :param seed:
+        """
+        self.host = host
+        self.port = port
+        self.hwm = hwm
+        self.socket = None
+
+        self.split_point = 38
+        self.vec_num = 38
+        self.heat_num = 19
+
+        self.batch_size = batch_size
+
+    def _recv_arrays(self):
+        """Receive a list of NumPy arrays.
+        Parameters
+        ----------
+        socket : :class:`zmq.Socket`
+        The socket to receive the arrays on.
+        Returns
+        -------
+        list
+        A list of :class:`numpy.ndarray` objects.
+        Raises
+        ------
+        StopIteration
+        If the first JSON object received contains the key `stop`,
+        signifying that the server has finished a single epoch.
+        """
+        headers = self.socket.recv_json()
+        if 'stop' in headers:
+            raise StopIteration
+        arrays = []
+
+        for header in headers:
+            data = self.socket.recv()
+            buf = buffer_(data)
+            array = np.frombuffer(buf, dtype=np.dtype(header['descr']))
+            array.shape = make_tuple(header['shape'])
+
+            if header['fortran_order']:
+                array.shape = header['shape'][::-1]
+                array = array.transpose()
+            arrays.append(array)
+
+        return arrays
+
+    def gen(self):
+        batches_x, batches_x1, batches_x2, batches_y1, batches_y2 = \
+            [None]*self.batch_size, [None]*self.batch_size, [None]*self.batch_size, \
+            [None]*self.batch_size, [None]*self.batch_size
+
+        sample_idx = 0
+
+        while True:
+            data_img, mask_img, label = tuple(self._recv_arrays())
+
+            # image
+            dta_img = np.transpose(data_img, (1, 2, 0))
+            batches_x[sample_idx]=dta_img[np.newaxis, ...]
+
+            # mask - the same for vec_weights, heat_weights
+            vec_weights = np.repeat(mask_img[:,:,np.newaxis], self.vec_num, axis=2)
+            heat_weights = np.repeat(mask_img[:,:,np.newaxis], self.heat_num, axis=2)
+
+            batches_x1[sample_idx]=vec_weights[np.newaxis, ...]
+            batches_x2[sample_idx]=heat_weights[np.newaxis, ...]
+
+            # label
+            vec_label = label[:self.split_point, :, :]
+            vec_label = np.transpose(vec_label, (1, 2, 0))
+            heat_label = label[self.split_point:, :, :]
+            heat_label = np.transpose(heat_label, (1, 2, 0))
+
+            batches_y1[sample_idx]=vec_label[np.newaxis, ...]
+            batches_y2[sample_idx]=heat_label[np.newaxis, ...]
+
+            sample_idx += 1
+
+            if sample_idx == self.batch_size:
+                sample_idx = 0
+
+                batch_x = np.concatenate(batches_x)
+                batch_x1 = np.concatenate(batches_x1)
+                batch_x2 = np.concatenate(batches_x2)
+                batch_y1 = np.concatenate(batches_y1)
+                batch_y2 = np.concatenate(batches_y2)
+
+                yield [batch_x, batch_x1,  batch_x2], \
+                       [batch_y1, batch_y2,
+                        batch_y1, batch_y2,
+                        batch_y1, batch_y2,
+                        batch_y1, batch_y2,
+                        batch_y1, batch_y2,
+                        batch_y1, batch_y2]
+
+    def start(self):
+        context = zmq.Context()
+        self.socket = context.socket(zmq.PULL)
+        self.socket.set_hwm(self.hwm)
+        self.socket.connect("tcp://{}:{}".format(self.host, self.port))
+
+    def stop(self):
+        if self.socket:
+            self.socket.__del__()
+
+    def restart(self):
+        self.stop()
+        self.start()
diff --git a/training/generate_hdf5.py b/training/generate_hdf5.py
@@ -22,16 +22,15 @@
     (val_anno_path, val_img_dir, val_mask_dir, "COCO")
 ]
 
-#datasets = [
-#    (val_anno_path, val_img_dir, val_mask_dir, "COCO")
-#]
+# datasets = [
+#     (val_anno_path, val_img_dir, val_mask_dir, "COCO")
+# ]
 
 joint_all = []
-tr_hdf5_path = os.path.join(dataset_dir, "train_pre_dataset.h5")
-val_hdf5_path = os.path.join(dataset_dir, "val_pre_dataset.h5")
+tr_hdf5_path = os.path.join(dataset_dir, "train_dataset.h5")
+val_hdf5_path = os.path.join(dataset_dir, "val_dataset.h5")
 
 val_size = 2645 # size of validation set
-
 #val_size = 300
 
 def process():
diff --git a/training/inspect_dataset.ipynb b/training/inspect_dataset.ipynb
diff --git a/training/train_pose.py b/training/train_pose.py
@@ -6,10 +6,10 @@
 sys.path.append("..")
 from model import get_training_model
 from ds_iterator import DataIterator
+from ds_generator_client import DataGeneratorClient
 from optimizers import MultiSGD
 from keras.callbacks import LearningRateScheduler, ModelCheckpoint, CSVLogger, TensorBoard
 from keras.layers.convolutional import Conv2D
-from keras.utils.data_utils import get_file
 from keras.applications.vgg19 import VGG19
 
 batch_size = 10
@@ -21,6 +21,9 @@
 stepsize = 136106 #68053   // after each stepsize iterations update learning rate: lr=lr*gamma
 max_iter = 200000 # 600000
 
+# True = start data generator client, False = use augmented dataset file (deprecated)
+use_client_gen = True
+
 WEIGHTS_BEST = "weights.best.h5"
 TRAINING_LOG = "training.csv"
 LOGS_DIR = "./logs"
@@ -30,7 +33,7 @@ def get_last_epoch():
     return max(data['epoch'].values)
 
 
-model = get_training_model(weight_decay)
+model = get_training_model(weight_decay, vgg_norm=True)
 
 from_vgg = dict()
 from_vgg['conv1_1'] = 'block1_conv1'
@@ -64,15 +67,28 @@ def get_last_epoch():
     last_epoch = 0
 
 # prepare generators
-train_di = DataIterator("../dataset/train_dataset.h5", data_shape=(3, 368, 368),
-                  mask_shape=(1, 46, 46),
-                  label_shape=(57, 46, 46),
-                  vec_num=38, heat_num=19, batch_size=batch_size, shuffle=True)
 
-val_di = DataIterator("../dataset/val_dataset.h5", data_shape=(3, 368, 368),
-                  mask_shape=(1, 46, 46),
-                  label_shape=(57, 46, 46),
-                  vec_num=38, heat_num=19, batch_size=batch_size, shuffle=True)
+if use_client_gen:
+    train_client = DataGeneratorClient(port=5555, host="localhost", hwm=160, batch_size=10)
+    train_client.start()
+    train_di = train_client.gen()
+    train_samples = 52597
+
+    val_client = DataGeneratorClient(port=5556, host="localhost", hwm=160, batch_size=10)
+    val_client.start()
+    val_di = val_client.gen()
+    val_samples = 2645
+else:
+    train_di = DataIterator("../dataset/train_dataset.h5", data_shape=(3, 368, 368),
+                      mask_shape=(1, 46, 46),
+                      label_shape=(57, 46, 46),
+                      vec_num=38, heat_num=19, batch_size=batch_size, shuffle=True)
+    train_samples=train_di.N
+    val_di = DataIterator("../dataset/val_dataset.h5", data_shape=(3, 368, 368),
+                      mask_shape=(1, 46, 46),
+                      label_shape=(57, 46, 46),
+                      vec_num=38, heat_num=19, batch_size=batch_size, shuffle=True)
+    val_samples=val_di.N
 
 # setup lr multipliers for conv layers
 lr_mult=dict()
@@ -131,7 +147,7 @@ def get_last_epoch():
 loss_weights["weight_stage6_L2"] = 1
 
 # learning rate schedule - equivalent of caffe lr_policy =  "step"
-iterations_per_epoch = train_di.N // batch_size
+iterations_per_epoch = train_samples // batch_size
 def step_decay(epoch):
     initial_lrate = base_lr
     steps = epoch * iterations_per_epoch
@@ -155,12 +171,12 @@ def step_decay(epoch):
 model.compile(loss=losses, loss_weights=loss_weights, optimizer=multisgd, metrics=["accuracy"])
 
 model.fit_generator(train_di,
-                    steps_per_epoch=train_di.N // batch_size,
+                    steps_per_epoch=train_samples // batch_size,
                     epochs=max_iter,
                     callbacks=callbacks_list,
                     validation_data=val_di,
-                    validation_steps=val_di.N // batch_size,
-                    use_multiprocessing=True,
+                    validation_steps=val_samples // batch_size,
+                    use_multiprocessing=False,
                     initial_epoch=last_epoch
                     )