-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathmodel.py
More file actions
133 lines (90 loc) · 3.77 KB
/
model.py
File metadata and controls
133 lines (90 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
Definition of the neural networks.
"""
__all__ = (
'get_training_model',
'get_detect_model',
'WINDOW_SHAPE',
)
import tensorflow as tf
import common
WINDOW_SHAPE = (64, 128)
# Utility functions
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W, stride=(1, 1), padding='SAME'):
return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1],
padding=padding)
def max_pool(x, ksize=(2, 2), stride=(2, 2)):
return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1],
strides=[1, stride[0], stride[1], 1], padding='SAME')
def avg_pool(x, ksize=(2, 2), stride=(2, 2)):
return tf.nn.avg_pool(x, ksize=[1, ksize[0], ksize[1], 1],
strides=[1, stride[0], stride[1], 1], padding='SAME')
def convolutional_layers():
"""
Get the convolutional layers of the model.
"""
x = tf.placeholder(tf.float32, [None, None, None])
# First layer
W_conv1 = weight_variable([5, 5, 1, 48])
b_conv1 = bias_variable([48])
x_expanded = tf.expand_dims(x, 3)
h_conv1 = tf.nn.relu(conv2d(x_expanded, W_conv1) + b_conv1)
h_pool1 = max_pool(h_conv1, ksize=(2, 2), stride=(2, 2))
# Second layer
W_conv2 = weight_variable([5, 5, 48, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool(h_conv2, ksize=(2, 1), stride=(2, 1))
# Third layer
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool(h_conv3, ksize=(2, 2), stride=(2, 2))
return x, h_pool3, [W_conv1, b_conv1,
W_conv2, b_conv2,
W_conv3, b_conv3]
def get_training_model():
"""
The training model acts on a batch of 128x64 windows, and outputs a (1 +
7 * len(common.CHARS) vector, `v`. `v[0]` is the probability that a plate is
fully within the image and is at the correct scale.
`v[1 + i * len(common.CHARS) + c]` is the probability that the `i`'th
character is `c`.
"""
x, conv_layer, conv_vars = convolutional_layers()
# Densely connected layer
W_fc1 = weight_variable([32 * 8 * 128, 2048])
b_fc1 = bias_variable([2048])
conv_layer_flat = tf.reshape(conv_layer, [-1, 32 * 8 * 128])
h_fc1 = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1)
# Output layer
W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)])
b_fc2 = bias_variable([1 + 7 * len(common.CHARS)])
y = tf.matmul(h_fc1, W_fc2) + b_fc2
return (x, y, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])
def get_detect_model():
"""
The same as the training model, except it acts on an arbitrarily sized
input, and slides the 128x64 window across the image in 8x8 strides.
The output is of the form `v`, where `v[i, j]` is equivalent to the output
of the training model, for the window at coordinates `(8 * i, 4 * j)`.
"""
x, conv_layer, conv_vars = convolutional_layers()
# Fourth layer
W_fc1 = weight_variable([8 * 32 * 128, 2048])
W_conv1 = tf.reshape(W_fc1, [8, 32, 128, 2048])
b_fc1 = bias_variable([2048])
h_conv1 = tf.nn.relu(conv2d(conv_layer, W_conv1,
stride=(1, 1), padding="VALID") + b_fc1)
# Fifth layer
W_fc2 = weight_variable([2048, 1 + 7 * len(common.CHARS)])
W_conv2 = tf.reshape(W_fc2, [1, 1, 2048, 1 + 7 * len(common.CHARS)])
b_fc2 = bias_variable([1 + 7 * len(common.CHARS)])
h_conv2 = conv2d(h_conv1, W_conv2) + b_fc2
return (x, h_conv2, conv_vars + [W_fc1, b_fc1, W_fc2, b_fc2])