4
4
mobilenetv1(width_mult, config;
5
5
activation = relu,
6
6
inchannels = 3,
7
- nclasses = 1000 ,
8
- fcsize = 1024 )
7
+ fcsize = 1024 ,
8
+ nclasses = 1000 )
9
9
10
10
Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)).
11
11
@@ -21,23 +21,24 @@ Create a MobileNetv1 model ([reference](https://arxiv.org/abs/1704.04861v1)).
21
21
+ `s`: The stride of the convolutional kernel
22
22
+ `r`: The number of time this configuration block is repeated
23
23
- `activate`: The activation function to use throughout the network
24
- - `inchannels`: The number of input feature maps``
24
+ - `inchannels`: The number of input channels. The default value is 3.
25
25
- `fcsize`: The intermediate fully-connected size between the convolution and final layers
26
26
- `nclasses`: The number of output classes
27
27
"""
28
28
function mobilenetv1 (width_mult, config;
29
29
activation = relu,
30
30
inchannels = 3 ,
31
- nclasses = 1000 ,
32
- fcsize = 1024 )
31
+ fcsize = 1024 ,
32
+ nclasses = 1000 )
33
33
layers = []
34
34
for (dw, outch, stride, nrepeats) in config
35
35
outch = Int (outch * width_mult)
36
36
for _ in 1 : nrepeats
37
37
layer = dw ?
38
38
depthwise_sep_conv_bn ((3 , 3 ), inchannels, outch, activation;
39
39
stride = stride, pad = 1 , bias = false ) :
40
- conv_bn ((3 , 3 ), inchannels, outch, activation; stride = stride, pad = 1 )
40
+ conv_bn ((3 , 3 ), inchannels, outch, activation; stride = stride, pad = 1 ,
41
+ bias = false )
41
42
append! (layers, layer)
42
43
inchannels = outch
43
44
end
@@ -51,7 +52,7 @@ function mobilenetv1(width_mult, config;
51
52
end
52
53
53
54
const mobilenetv1_configs = [
54
- # dw, c, s, r
55
+ # dw, c, s, r
55
56
(false , 32 , 2 , 1 ),
56
57
(true , 64 , 1 , 1 ),
57
58
(true , 128 , 2 , 1 ),
@@ -65,7 +66,7 @@ const mobilenetv1_configs = [
65
66
]
66
67
67
68
"""
68
- MobileNetv1(width_mult = 1; pretrain = false, nclasses = 1000)
69
+ MobileNetv1(width_mult = 1; inchannels = 3, pretrain = false, nclasses = 1000)
69
70
70
71
Create a MobileNetv1 model with the baseline configuration
71
72
([reference](https://arxiv.org/abs/1704.04861v1)).
@@ -76,6 +77,7 @@ Set `pretrain` to `true` to load the pretrained weights for ImageNet.
76
77
- `width_mult`: Controls the number of output feature maps in each block
77
78
(with 1.0 being the default in the paper;
78
79
this is usually a value between 0.1 and 1.4)
80
+ - `inchannels`: The number of input channels. The default value is 3.
79
81
- `pretrain`: Whether to load the pre-trained weights for ImageNet
80
82
- `nclasses`: The number of output classes
81
83
@@ -85,10 +87,10 @@ struct MobileNetv1
85
87
layers:: Any
86
88
end
87
89
88
- function MobileNetv1 (width_mult:: Number = 1 ; pretrain = false , nclasses = 1000 )
89
- layers = mobilenetv1 (width_mult, mobilenetv1_configs; nclasses = nclasses)
90
+ function MobileNetv1 (width_mult:: Number = 1 ; inchannels = 3 , pretrain = false ,
91
+ nclasses = 1000 )
92
+ layers = mobilenetv1 (width_mult, mobilenetv1_configs; inchannels, nclasses)
90
93
pretrain && loadpretrain! (layers, string (" MobileNetv1" ))
91
-
92
94
return MobileNetv1 (layers)
93
95
end
94
96
@@ -102,7 +104,7 @@ classifier(m::MobileNetv1) = m.layers[2]
102
104
# MobileNetv2
103
105
104
106
"""
105
- mobilenetv2(width_mult, configs; max_width = 1280, nclasses = 1000)
107
+ mobilenetv2(width_mult, configs; inchannels = 3, max_width = 1280, nclasses = 1000)
106
108
107
109
Create a MobileNetv2 model.
108
110
([reference](https://arxiv.org/abs/1801.04381)).
@@ -119,14 +121,15 @@ Create a MobileNetv2 model.
119
121
+ `n`: The number of times a block is repeated
120
122
+ `s`: The stride of the convolutional kernel
121
123
+ `a`: The activation function used in the bottleneck layer
124
+ - `inchannels`: The number of input channels. The default value is 3.
122
125
- `max_width`: The maximum number of feature maps in any layer of the network
123
126
- `nclasses`: The number of output classes
124
127
"""
125
- function mobilenetv2 (width_mult, configs; max_width = 1280 , nclasses = 1000 )
128
+ function mobilenetv2 (width_mult, configs; inchannels = 3 , max_width = 1280 , nclasses = 1000 )
126
129
# building first layer
127
130
inplanes = _round_channels (32 * width_mult, width_mult == 0.1 ? 4 : 8 )
128
131
layers = []
129
- append! (layers, conv_bn ((3 , 3 ), 3 , inplanes; stride = 2 ))
132
+ append! (layers, conv_bn ((3 , 3 ), inchannels , inplanes; pad = 1 , stride = 2 ))
130
133
# building inverted residual blocks
131
134
for (t, c, n, s, a) in configs
132
135
outplanes = _round_channels (c * width_mult, width_mult == 0.1 ? 4 : 8 )
@@ -165,7 +168,7 @@ struct MobileNetv2
165
168
end
166
169
167
170
"""
168
- MobileNetv2(width_mult = 1.0; pretrain = false, nclasses = 1000)
171
+ MobileNetv2(width_mult = 1.0; inchannels = 3, pretrain = false, nclasses = 1000)
169
172
170
173
Create a MobileNetv2 model with the specified configuration.
171
174
([reference](https://arxiv.org/abs/1801.04381)).
@@ -176,13 +179,15 @@ Set `pretrain` to `true` to load the pretrained weights for ImageNet.
176
179
- `width_mult`: Controls the number of output feature maps in each block
177
180
(with 1.0 being the default in the paper;
178
181
this is usually a value between 0.1 and 1.4)
182
+ - `inchannels`: The number of input channels. The default value is 3.
179
183
- `pretrain`: Whether to load the pre-trained weights for ImageNet
180
184
- `nclasses`: The number of output classes
181
185
182
186
See also [`Metalhead.mobilenetv2`](#).
183
187
"""
184
- function MobileNetv2 (width_mult:: Number = 1 ; pretrain = false , nclasses = 1000 )
185
- layers = mobilenetv2 (width_mult, mobilenetv2_configs; nclasses = nclasses)
188
+ function MobileNetv2 (width_mult:: Number = 1 ; inchannels = 3 , pretrain = false ,
189
+ nclasses = 1000 )
190
+ layers = mobilenetv2 (width_mult, mobilenetv2_configs; inchannels, nclasses)
186
191
pretrain && loadpretrain! (layers, string (" MobileNetv2" ))
187
192
return MobileNetv2 (layers)
188
193
end
@@ -197,7 +202,7 @@ classifier(m::MobileNetv2) = m.layers[2]
197
202
# MobileNetv3
198
203
199
204
"""
200
- mobilenetv3(width_mult, configs; max_width = 1024, nclasses = 1000)
205
+ mobilenetv3(width_mult, configs; inchannels = 3, max_width = 1024, nclasses = 1000)
201
206
202
207
Create a MobileNetv3 model.
203
208
([reference](https://arxiv.org/abs/1905.02244)).
@@ -216,14 +221,17 @@ Create a MobileNetv3 model.
216
221
+ `r::Integer` - The reduction factor (`>= 1` or `nothing` to skip) for squeeze and excite layers
217
222
+ `s::Integer` - The stride of the convolutional kernel
218
223
+ `a` - The activation function used in the bottleneck (typically `hardswish` or `relu`)
224
+ - `inchannels`: The number of input channels. The default value is 3.
219
225
- `max_width`: The maximum number of feature maps in any layer of the network
220
226
- `nclasses`: the number of output classes
221
227
"""
222
- function mobilenetv3 (width_mult, configs; max_width = 1024 , nclasses = 1000 )
228
+ function mobilenetv3 (width_mult, configs; inchannels = 3 , max_width = 1024 , nclasses = 1000 )
223
229
# building first layer
224
230
inplanes = _round_channels (16 * width_mult, 8 )
225
231
layers = []
226
- append! (layers, conv_bn ((3 , 3 ), 3 , inplanes, hardswish; stride = 2 ))
232
+ append! (layers,
233
+ conv_bn ((3 , 3 ), inchannels, inplanes, hardswish; pad = 1 , stride = 2 ,
234
+ bias = false ))
227
235
explanes = 0
228
236
# building inverted residual blocks
229
237
for (k, t, c, r, a, s) in configs
249
257
250
258
# Configurations for small and large mode for MobileNetv3
251
259
mobilenetv3_configs = Dict (:small => [
252
- # k, t, c, SE, a, s
260
+ # k, t, c, SE, a, s
253
261
(3 , 1 , 16 , 4 , relu, 2 ),
254
262
(3 , 4.5 , 24 , nothing , relu, 2 ),
255
263
(3 , 3.67 , 24 , nothing , relu, 1 ),
@@ -263,7 +271,7 @@ mobilenetv3_configs = Dict(:small => [
263
271
(5 , 6 , 96 , 4 , hardswish, 1 ),
264
272
],
265
273
:large => [
266
- # k, t, c, SE, a, s
274
+ # k, t, c, SE, a, s
267
275
(3 , 1 , 16 , nothing , relu, 1 ),
268
276
(3 , 4 , 24 , nothing , relu, 2 ),
269
277
(3 , 3 , 24 , nothing , relu, 1 ),
@@ -287,7 +295,7 @@ struct MobileNetv3
287
295
end
288
296
289
297
"""
290
- MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; pretrain = false, nclasses = 1000)
298
+ MobileNetv3(mode::Symbol = :small, width_mult::Number = 1; inchannels = 3, pretrain = false, nclasses = 1000)
291
299
292
300
Create a MobileNetv3 model with the specified configuration.
293
301
([reference](https://arxiv.org/abs/1905.02244)).
@@ -299,17 +307,18 @@ Set `pretrain = true` to load the model with pre-trained weights for ImageNet.
299
307
- `width_mult`: Controls the number of output feature maps in each block
300
308
(with 1.0 being the default in the paper;
301
309
this is usually a value between 0.1 and 1.4)
310
+ - `inchannels`: The number of channels in the input. The default value is 3.
302
311
- `pretrain`: whether to load the pre-trained weights for ImageNet
303
312
- `nclasses`: the number of output classes
304
313
305
314
See also [`Metalhead.mobilenetv3`](#).
306
315
"""
307
- function MobileNetv3 (mode:: Symbol = :small , width_mult:: Number = 1 ; pretrain = false ,
308
- nclasses = 1000 )
316
+ function MobileNetv3 (mode:: Symbol = :small , width_mult:: Number = 1 ; inchannels = 3 ,
317
+ pretrain = false , nclasses = 1000 )
309
318
@assert mode in [:large , :small ] " `mode` has to be either :large or :small"
310
319
max_width = (mode == :large ) ? 1280 : 1024
311
- layers = mobilenetv3 (width_mult, mobilenetv3_configs[mode]; max_width = max_width,
312
- nclasses = nclasses )
320
+ layers = mobilenetv3 (width_mult, mobilenetv3_configs[mode]; inchannels, max_width,
321
+ nclasses)
313
322
pretrain && loadpretrain! (layers, string (" MobileNetv3" , mode))
314
323
return MobileNetv3 (layers)
315
324
end
0 commit comments