init commit

foamliu · foamliu · commit 7e8d093c659d · 2018-04-28T15:53:27.000+08:00
diff --git a/README.md b/README.md
@@ -0,0 +1,55 @@
+# Facial Expression Prediction
+
+
+This repository is to do car recognition by fine-tuning ResNet-152 with Cars Dataset from Stanford.
+
+
+## Dependencies
+
+- [NumPy](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html)
+- [Tensorflow](https://www.tensorflow.org/versions/r0.8/get_started/os_setup.html)
+- [Keras](https://keras.io/#installation)
+- [OpenCV](https://opencv-python-tutroals.readthedocs.io/en/latest/)
+
+## Dataset
+
+We use the Cars Dataset, which contains 16,185 images of 196 classes of cars. The data is split into 8,144 training images and 8,041 testing images, where each class has been split roughly in a 50-50 split.
+
+ ![image](https://github.com/foamliu/Car-Recognition/raw/master/images/random.png)
+
+You can get it from [Cars Dataset](https://ai.stanford.edu/~jkrause/cars/car_dataset.html), make sure cars_train.tgz and cars_test.tgz are is in mart folder.
+
+## ImageNet Pretrained Models
+
+Download [ResNet-152](https://drive.google.com/file/d/0Byy2AcGyEVxfeXExMzNNOHpEODg/view?usp=sharing) into imagenet_models folder.
+
+## Usage
+
+### Data Pre-processing
+Extract 8,041 training images, and split them by 80:20 rule (6,433 for training, 1,608 for validation):
+```bash
+$ python pre-process.py
+```
+ When complete, folder structure looks like:
+
+ ![image](https://github.com/foamliu/Car-Recognition/raw/master/images/data.png)
+
+### Train
+```bash
+$ python train.py
+```
+ ![image](https://github.com/foamliu/Car-Recognition/raw/master/images/train.png)
+
+If you want to visualize during training, run in your terminal:
+```bash
+$ tensorboard --logdir path_to_current_dir/logs
+```
+
+### Analysis
+Use 8,041 testing images for result analysis.
+
+
+### Predict
+```bash
+$ python predict.py --i [image_path]
+```
diff --git a/Requirements.txt b/Requirements.txt
@@ -0,0 +1,4 @@
+numpy
+tensorflow-gpu
+keras
+pillow
diff --git a/app-what-car.py b/app-what-car.py
@@ -0,0 +1,32 @@
+#from resnet_50 import resnet50_model
+from resnet_152 import resnet152_model
+from keras.preprocessing.image import ImageDataGenerator
+
+IMG_WIDTH, IMG_HEIGHT = 227, 227
+TRAIN_DATA = 'mart/standford-cars-crop/train'
+VALID_DATA = 'mart/standford-cars-crop/valid'
+NUM_CLASSES = 196
+NB_TRAIN_SAMPLES = 6549
+NB_VALID_SAMPLES = 1595
+BATCH_SIZE = 16
+
+# build a classifier model
+#model = resnet50_model(IMG_HEIGHT, IMG_WIDTH, 3, NUM_CLASSES)
+model = resnet152_model(IMG_HEIGHT, IMG_WIDTH, 3, NUM_CLASSES)
+
+# prepare data augmentation configuration
+train_data_gen = ImageDataGenerator(rescale=1. / 255, zoom_range=0.2, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True)
+valid_data_gen = ImageDataGenerator(rescale=1. / 255, zoom_range=0.2, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True)
+
+train_generator = train_data_gen.flow_from_directory(TRAIN_DATA, (IMG_WIDTH, IMG_HEIGHT), batch_size=BATCH_SIZE, class_mode='categorical')
+valid_generator = valid_data_gen.flow_from_directory(VALID_DATA, (IMG_WIDTH, IMG_HEIGHT), batch_size=BATCH_SIZE, class_mode='categorical')
+
+# fine tune the model
+history = model.fit_generator(
+    train_generator,
+    steps_per_epoch=NB_TRAIN_SAMPLES // BATCH_SIZE,
+    validation_data=valid_generator,
+    validation_steps=NB_VALID_SAMPLES // BATCH_SIZE,
+    epochs=80)
+
+model.save_weights("model.h5")
diff --git a/custom_layers/__init__.py b/custom_layers/__init__.py
@@ -0,0 +1 @@
+# Python Package
diff --git a/custom_layers/scale_layer.py b/custom_layers/scale_layer.py
@@ -0,0 +1,71 @@
+from keras.layers.core import Layer
+from keras.engine import InputSpec
+from keras import backend as K
+try:
+    from keras import initializations
+except ImportError:
+    from keras import initializers as initializations
+
+class Scale(Layer):
+    '''Learns a set of weights and biases used for scaling the input data.
+    the output consists simply in an element-wise multiplication of the input
+    and a sum of a set of constants:
+
+        out = in * gamma + beta,
+
+    where 'gamma' and 'beta' are the weights and biases larned.
+
+    # Arguments
+        axis: integer, axis along which to normalize in mode 0. For instance,
+            if your input tensor has shape (samples, channels, rows, cols),
+            set axis to 1 to normalize per feature map (channels axis).
+        momentum: momentum in the computation of the
+            exponential average of the mean and standard deviation
+            of the data, for feature-wise normalization.
+        weights: Initialization weights.
+            List of 2 Numpy arrays, with shapes:
+            `[(input_shape,), (input_shape,)]`
+        beta_init: name of initialization function for shift parameter
+            (see [initializations](../initializations.md)), or alternatively,
+            Theano/TensorFlow function to use for weights initialization.
+            This parameter is only relevant if you don't pass a `weights` argument.
+        gamma_init: name of initialization function for scale parameter (see
+            [initializations](../initializations.md)), or alternatively,
+            Theano/TensorFlow function to use for weights initialization.
+            This parameter is only relevant if you don't pass a `weights` argument.
+    '''
+    def __init__(self, weights=None, axis=-1, momentum = 0.9, beta_init='zero', gamma_init='one', **kwargs):
+        self.momentum = momentum
+        self.axis = axis
+        self.beta_init = initializations.get(beta_init)
+        self.gamma_init = initializations.get(gamma_init)
+        self.initial_weights = weights
+        super(Scale, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        self.input_spec = [InputSpec(shape=input_shape)]
+        shape = (int(input_shape[self.axis]),)
+
+        # Compatibility with TensorFlow >= 1.0.0
+        self.gamma = K.variable(self.gamma_init(shape), name='{}_gamma'.format(self.name))
+        self.beta = K.variable(self.beta_init(shape), name='{}_beta'.format(self.name))
+        #self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name))
+        #self.beta = self.beta_init(shape, name='{}_beta'.format(self.name))
+        self.trainable_weights = [self.gamma, self.beta]
+
+        if self.initial_weights is not None:
+            self.set_weights(self.initial_weights)
+            del self.initial_weights
+
+    def call(self, x, mask=None):
+        input_shape = self.input_spec[0].shape
+        broadcast_shape = [1] * len(input_shape)
+        broadcast_shape[self.axis] = input_shape[self.axis]
+
+        out = K.reshape(self.gamma, broadcast_shape) * x + K.reshape(self.beta, broadcast_shape)
+        return out
+
+    def get_config(self):
+        config = {"momentum": self.momentum, "axis": self.axis}
+        base_config = super(Scale, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
diff --git a/resnet_152.py b/resnet_152.py
@@ -0,0 +1,207 @@
+# -*- coding: utf-8 -*-
+
+from keras.optimizers import SGD
+from keras.layers import Input, Dense, Conv2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Flatten, merge, Activation
+from keras.layers.normalization import BatchNormalization
+from keras.models import Model
+from keras import backend as K
+
+from custom_layers.scale_layer import Scale
+
+from sklearn.metrics import log_loss
+
+import sys
+
+sys.setrecursionlimit(3000)
+
+
+def identity_block(input_tensor, kernel_size, filters, stage, block):
+    '''The identity_block is the block that has no conv layer at shortcut
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: defualt 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+    '''
+    eps = 1.1e-5
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+    scale_name_base = 'scale' + str(stage) + block + '_branch'
+
+    x = Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', use_bias=False)(input_tensor)
+    x = BatchNormalization(epsilon=eps, axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = Scale(axis=bn_axis, name=scale_name_base + '2a')(x)
+    x = Activation('relu', name=conv_name_base + '2a_relu')(x)
+
+    x = ZeroPadding2D((1, 1), name=conv_name_base + '2b_zeropadding')(x)
+    x = Conv2D(nb_filter2, (kernel_size, kernel_size),
+               name=conv_name_base + '2b', use_bias=False)(x)
+    x = BatchNormalization(epsilon=eps, axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = Scale(axis=bn_axis, name=scale_name_base + '2b')(x)
+    x = Activation('relu', name=conv_name_base + '2b_relu')(x)
+
+    x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', use_bias=False)(x)
+    x = BatchNormalization(epsilon=eps, axis=bn_axis, name=bn_name_base + '2c')(x)
+    x = Scale(axis=bn_axis, name=scale_name_base + '2c')(x)
+
+    x = merge([x, input_tensor], mode='sum', name='res' + str(stage) + block)
+    x = Activation('relu', name='res' + str(stage) + block + '_relu')(x)
+    return x
+
+
+def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
+    '''conv_block is the block that has a conv layer at shortcut
+    # Arguments
+        input_tensor: input tensor
+        kernel_size: defualt 3, the kernel size of middle conv layer at main path
+        filters: list of integers, the nb_filters of 3 conv layer at main path
+        stage: integer, current stage label, used for generating layer names
+        block: 'a','b'..., current block label, used for generating layer names
+    Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
+    And the shortcut should have subsample=(2,2) as well
+    '''
+    eps = 1.1e-5
+    nb_filter1, nb_filter2, nb_filter3 = filters
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+    scale_name_base = 'scale' + str(stage) + block + '_branch'
+
+    x = Conv2D(nb_filter1, (1, 1), strides=strides,
+               name=conv_name_base + '2a', use_bias=False)(input_tensor)
+    x = BatchNormalization(epsilon=eps, axis=bn_axis, name=bn_name_base + '2a')(x)
+    x = Scale(axis=bn_axis, name=scale_name_base + '2a')(x)
+    x = Activation('relu', name=conv_name_base + '2a_relu')(x)
+
+    x = ZeroPadding2D((1, 1), name=conv_name_base + '2b_zeropadding')(x)
+    x = Conv2D(nb_filter2, (kernel_size, kernel_size),
+               name=conv_name_base + '2b', use_bias=False)(x)
+    x = BatchNormalization(epsilon=eps, axis=bn_axis, name=bn_name_base + '2b')(x)
+    x = Scale(axis=bn_axis, name=scale_name_base + '2b')(x)
+    x = Activation('relu', name=conv_name_base + '2b_relu')(x)
+
+    x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', use_bias=False)(x)
+    x = BatchNormalization(epsilon=eps, axis=bn_axis, name=bn_name_base + '2c')(x)
+    x = Scale(axis=bn_axis, name=scale_name_base + '2c')(x)
+
+    shortcut = Conv2D(nb_filter3, (1, 1), strides=strides,
+                      name=conv_name_base + '1', use_bias=False)(input_tensor)
+    shortcut = BatchNormalization(epsilon=eps, axis=bn_axis, name=bn_name_base + '1')(shortcut)
+    shortcut = Scale(axis=bn_axis, name=scale_name_base + '1')(shortcut)
+
+    x = merge([x, shortcut], mode='sum', name='res' + str(stage) + block)
+    x = Activation('relu', name='res' + str(stage) + block + '_relu')(x)
+    return x
+
+
+def resnet152_model(img_rows, img_cols, color_type=1, num_classes=None):
+    """
+    Resnet 152 Model for Keras
+
+    Model Schema and layer naming follow that of the original Caffe implementation
+    https://github.com/KaimingHe/deep-residual-networks
+
+    ImageNet Pretrained Weights 
+    Theano: https://drive.google.com/file/d/0Byy2AcGyEVxfZHhUT3lWVWxRN28/view?usp=sharing
+    TensorFlow: https://drive.google.com/file/d/0Byy2AcGyEVxfeXExMzNNOHpEODg/view?usp=sharing
+
+    Parameters:
+      img_rows, img_cols - resolution of inputs
+      channel - 1 for grayscale, 3 for color 
+      num_classes - number of class labels for our classification task
+    """
+    eps = 1.1e-5
+
+    # Handle Dimension Ordering for different backends
+    global bn_axis
+    if K.image_dim_ordering() == 'tf':
+        bn_axis = 3
+        img_input = Input(shape=(img_rows, img_cols, color_type), name='data')
+    else:
+        bn_axis = 1
+        img_input = Input(shape=(color_type, img_rows, img_cols), name='data')
+
+    x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
+    x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(x)
+    x = BatchNormalization(epsilon=eps, axis=bn_axis, name='bn_conv1')(x)
+    x = Scale(axis=bn_axis, name='scale_conv1')(x)
+    x = Activation('relu', name='conv1_relu')(x)
+    x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)
+
+    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
+    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
+
+    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
+    for i in range(1, 8):
+        x = identity_block(x, 3, [128, 128, 512], stage=3, block='b' + str(i))
+
+    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
+    for i in range(1, 36):
+        x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b' + str(i))
+
+    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
+    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
+    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
+
+    x_fc = AveragePooling2D((7, 7), name='avg_pool')(x)
+    x_fc = Flatten()(x_fc)
+    x_fc = Dense(1000, activation='softmax', name='fc1000')(x_fc)
+
+    model = Model(img_input, x_fc)
+
+    if K.image_dim_ordering() == 'th':
+        # Use pre-trained weights for Theano backend
+        weights_path = 'imagenet_models/resnet152_weights_th.h5'
+    else:
+        # Use pre-trained weights for Tensorflow backend
+        weights_path = 'imagenet_models/resnet152_weights_tf.h5'
+
+    model.load_weights(weights_path, by_name=True)
+
+    # Truncate and replace softmax layer for transfer learning
+    # Cannot use model.layers.pop() since model is not of Sequential() type
+    # The method below works since pre-trained weights are stored in layers but not in the model
+    x_newfc = AveragePooling2D((7, 7), name='avg_pool')(x)
+    x_newfc = Flatten()(x_newfc)
+    x_newfc = Dense(num_classes, activation='softmax', name='fc8')(x_newfc)
+
+    model = Model(img_input, x_newfc)
+
+    # Learning rate is changed to 0.001
+    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
+    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
+
+    return model
+
+
+if __name__ == '__main__':
+    # Example to fine-tune on 3000 samples from Cifar10
+
+    img_rows, img_cols = 224, 224  # Resolution of inputs
+    channel = 3
+    num_classes = 10
+    batch_size = 8
+    epochs = 10
+
+    # Load Cifar10 data. Please implement your own load_data() module for your own dataset
+    X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols)
+
+    # Load our model
+    model = resnet152_model(img_rows, img_cols, channel, num_classes)
+
+    # Start Fine-tuning
+    model.fit(X_train, Y_train,
+              batch_size=batch_size,
+              epochs=epochs,
+              shuffle=True,
+              verbose=1,
+              validation_data=(X_valid, Y_valid),
+              )
+
+    # Make predictions
+    predictions_valid = model.predict(X_valid, batch_size=batch_size, verbose=1)
+
+    # Cross-entropy loss score
+    score = log_loss(Y_valid, predictions_valid)

-Original file line number
+Diff line change
 +numpy
 +tensorflow-gpu
 +keras
 +pillow