fixed training procedure

michalfaber · michalfaber · commit 3eddd4492ff0 · 2017-10-26T19:12:22.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,7 @@ dataset/annotations
 dataset/coco
 .idea
 __pycache__
-.ipynb_checkpoints
+.ipynb_checkpoints
+logs/
+training.csv
+weights.best.h5
diff --git a/README.md b/README.md
@@ -44,15 +44,30 @@ any image file as an input.
 - `python demo_camera.py` to run the web demo.
 
 ## Training steps
-**UPDATE 10/2017:**
 
-**-Augmented samples are fetched from the [server](https://github.com/michalfaber/rmpe_dataset_server). The network never sees the same image twice
+**UPDATE 26/10/2017**
+
+**Fixed problem with the training procedure. 
+ Here are my results after training for 5 epochs = 25000 iterations (1 epoch is ~5000 batches)
+ The loss values are quite similar as in the original training - [output.txt](https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/blob/master/training/example_loss/output.txt)**
+   
+<p align="center">
+<img src="https://github.com/michalfaber/keras_Realtime_Multi-Person_Pose_Estimation/blob/master/readme/losses.png", width="700">
+</p>
+
+**Results of running `demo_image --image sample_images/ski.jpg --model training/weights.best.h5` with weights trained only 25000 iterations. Not too bad !!! Training on my single 1070 GPU took around 10 hours.**
+
+<p align="center">
+<img src="https://github.com/michalfaber/keras_Realtime_Multi-Person_Pose_Estimation/blob/master/readme/5ep_result.png", width="700">
+</p>
+
+**UPDATE 22/10/2017:**
+
+**Augmented samples are fetched from the [server](https://github.com/michalfaber/rmpe_dataset_server). The network never sees the same image twice
   which was a problem in previous approach (tool rmpe_dataset_transformer)
   This allows you to run augmentation locally or on separate node. 
   You can start 2 instances, one serving training set and a second one serving validation set (on different port if locally)** 
   
-**-Experimentally I've added image normalization as in vgg paper (images should be zero-centered by mean pixel subtraction)**
-
 - Install gsutil `curl https://sdk.cloud.google.com | bash`. This is a really helpful tool for downloading large datasets. 
 - Download the data set (~25 GB) `cd dataset; sh get_dataset.sh`,
 - Download [COCO official toolbox](https://github.com/pdollar/coco) in `dataset/coco/` . 
@@ -73,16 +88,6 @@ any image file as an input.
 - Set the correct number of samples within `python train_pose.py` - variables "train_samples = ???" and "val_samples = ???".  
  This number is used by keras to determine how many samples are in 1 epoch.
 - Train the model in a third terminal `python train_pose.py`
-
-NOTE:
-I trained the model from scratch for 3,5 days on a single GPU 1070 but did't obtain satisfactory results.
-38 epochs is about 200000 iterations in caffe. 
-I noticed that reducing learning rate after the step 136106 (as in orginal caffe model) was probably too early
-because learning process slowed down.
- 
-<div align="center">
-<img src="https://github.com/michalfaber/keras_Realtime_Multi-Person_Pose_Estimation/blob/master/readme/tr_results.png", width="450", height="563">
-</div>
     
 ## Related repository
 - CVPR'16, [Convolutional Pose Machines](https://github.com/shihenw/convolutional-pose-machines-release).
diff --git a/caffe_to_keras.py b/caffe_to_keras.py
@@ -1,11 +1,11 @@
-from model import get_model
+from model import get_testing_model
 import numpy as np
 import os
 
 CAFFE_LAYERS_DIR = "model/caffe/layers"
 KERAS_MODEL_FILE = "model/keras/model.h5"
 
-m = get_model()
+m = get_testing_model()
 
 for layer in m.layers:
     layer_name = layer.name
diff --git a/demo.ipynb b/demo.ipynb
@@ -2,11 +2,17 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
    "source": [
     "import keras\n",
     "from keras.models import Sequential\n",
@@ -30,9 +36,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 2,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -134,11 +140,12 @@
    "cell_type": "code",
    "execution_count": 18,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
-    "weights_path = \"model/keras/model.h5\"\n",
+    "weights_path = \"model/keras/model.h5\" # orginal weights converted from caffe\n",
+    "#weights_path = \"training/weights.best.h5\" # weights tarined from scratch \n",
     "\n",
     "input_shape = (None,None,3)\n",
     "\n",
@@ -195,9 +202,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -237,7 +242,7 @@
    "cell_type": "code",
    "execution_count": 21,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -256,9 +261,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -380,9 +383,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -416,9 +417,7 @@
   {
    "cell_type": "code",
    "execution_count": 25,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -463,7 +462,7 @@
    "cell_type": "code",
    "execution_count": 26,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -516,7 +515,7 @@
    "cell_type": "code",
    "execution_count": 28,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -573,7 +572,7 @@
    "cell_type": "code",
    "execution_count": 29,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -645,9 +644,7 @@
   {
    "cell_type": "code",
    "execution_count": 31,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -691,9 +688,7 @@
   {
    "cell_type": "code",
    "execution_count": 32,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -744,9 +739,9 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python [conda env:python35]",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "conda-env-python35-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -758,9 +753,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.6.1"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/demo_image.py b/demo_image.py
@@ -9,8 +9,6 @@
 from model import get_testing_model
 
 
-keras_weights_file = "model/keras/model.h5"
-
 # find connection in the specified sequence, center 29 is in the position 15
 limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
            [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
@@ -229,10 +227,12 @@ def process (input_image, params, model_params):
     parser = argparse.ArgumentParser()
     parser.add_argument('--image', type=str, required=True, help='input image')
     parser.add_argument('--output', type=str, default='result.png', help='output image')
+    parser.add_argument('--model', type=str, default='model/keras/model.h5', help='path to the weights file')
 
     args = parser.parse_args()
     input_image = args.image
     output = args.output
+    keras_weights_file = args.model
 
     tic = time.time()
     print('start processing...')
@@ -241,7 +241,7 @@ def process (input_image, params, model_params):
 
     # authors of original model don't use
     # vgg normalization (subtracting mean) on input images
-    model = get_testing_model(vgg_norm=False)
+    model = get_testing_model()
     model.load_weights(keras_weights_file)
 
     # load config
diff --git a/model.py b/model.py
@@ -6,7 +6,6 @@
 from keras.layers.merge import Multiply
 from keras.regularizers import l2
 from keras.initializers import random_normal,constant
-import numpy as np
 
 def relu(x): return Activation('relu')(x)
 
@@ -110,7 +109,7 @@ def apply_mask(x, mask1, mask2, num_p, stage, branch):
     return w
 
 
-def get_training_model(weight_decay, vgg_norm):
+def get_training_model(weight_decay):
 
     stages = 6
     np_branch1 = 38
@@ -131,11 +130,7 @@ def get_training_model(weight_decay, vgg_norm):
     inputs.append(vec_weight_input)
     inputs.append(heat_weight_input)
 
-    if vgg_norm:
-        vgg_mean = np.array([103.939, 116.779, 123.68])  # BGR
-        img_normalized = Lambda(lambda x: x - vgg_mean)(img_input)
-    else:
-        img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
+    img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
 
     # VGG
     stage0_out = vgg_block(img_normalized, weight_decay)
@@ -174,7 +169,7 @@ def get_training_model(weight_decay, vgg_norm):
     return model
 
 
-def get_testing_model(vgg_norm=False):
+def get_testing_model():
     stages = 6
     np_branch1 = 38
     np_branch2 = 19
@@ -183,11 +178,7 @@ def get_testing_model(vgg_norm=False):
 
     img_input = Input(shape=img_input_shape)
 
-    if vgg_norm:
-        vgg_mean = np.array([103.939, 116.779, 123.68])  # BGR
-        img_normalized = Lambda(lambda x: x - vgg_mean)(img_input)
-    else:
-        img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
+    img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input) # [-0.5, 0.5]
 
     # VGG
     stage0_out = vgg_block(img_normalized, None)
diff --git a/readme/5ep_result.png b/readme/5ep_result.png
diff --git a/readme/losses.png b/readme/losses.png
diff --git a/training/train_pose.py b/training/train_pose.py