Merge #1561

bors[bot] · DhairyaLGandhi · web-flow · commit 8a5b9774f0d6 · 2021-04-01T00:52:28.000Z
1561: Reverts #1557 r=DhairyaLGandhi a=DhairyaLGandhi Reverts #1557 Co-authored-by: Dhairya Gandhi <dhairya@juliacomputing.com>
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -69,7 +69,7 @@ extraChain(::Tuple{}, x) = ()
 
 
 """
-    Dense(in, out, σ = identity; bias = true, init = glorot_uniform)
+    Dense(in, out, σ=identity; bias=true, init=glorot_uniform)
     Dense(W::AbstractMatrix, [bias, σ])
 
 Create a traditional `Dense` layer, whose forward pass is given by:
@@ -81,7 +81,7 @@ as an `in × N` matrix, or any array with `size(x,1) == in`.
 The out `y` will be a vector  of length `out`, or a batch with
 `size(y) == (out, size(x)[2:end]...)`
 
-Keyword `bias = false` will switch off trainable bias for the layer.
+Keyword `bias=false` will switch off trainable bias for the layer.
 The initialisation of the weight matrix is `W = init(out, in)`, calling the function
 given to keyword `init`, with default [`glorot_uniform`](@doc Flux.glorot_uniform).
 The weight matrix and/or the bias vector (of length `out`) may also be provided explicitly.
@@ -109,41 +109,45 @@ julia> Flux.params(d1)  # no trainable bias
 Params([[1.0 1.0 … 1.0 1.0; 1.0 1.0 … 1.0 1.0]])
 ```
 """
-struct Dense{F,S<:AbstractArray,T}
-  weight::S
-  bias::T
+struct Dense{F, M<:AbstractMatrix, B}
+  weight::M
+  bias::B
   σ::F
+  function Dense(W::M, bias = true, σ::F = identity) where {M<:AbstractMatrix, F}
+    b = create_bias(W, bias, size(W,1))
+    new{F,M,typeof(b)}(W, b, σ)
+  end
 end
 
-Dense(W, b) = Dense(W, b, identity)
+function Dense(in::Integer, out::Integer, σ = identity;
+               initW = nothing, initb = nothing,
+               init = glorot_uniform, bias=true)
 
-Dense(W::AbstractArray, b::Bool = true, σ = identity) =
-  Dense(W, create_bias(W, b, size(W,1)), σ)
-
-function Dense(in::Integer, out::Integer, σ = identity; initW = nothing,
-               init = glorot_uniform, initb = nothing, bias::Bool = true)
-  if initW !== nothing
-    Base.depwarn("initW is deprecated, please use the `init` keyword instead", :Dense)
-    init = initW
+  W = if initW !== nothing
+    Base.depwarn("keyword initW is deprecated, please use init (which similarly accepts a funtion like randn)", :Dense)
+    initW(out, in)
+  else
+    init(out, in)
   end
 
-  if initb !== nothing
-    Base.depwarn("initb is deprecated, please use the array based constructors instead", :Dense)
-    initb = initb
+  b = if bias === true && initb !== nothing
+    Base.depwarn("keyword initb is deprecated, please simply supply the bias vector, bias=initb(out)", :Dense)
+    initb(out)
   else
-    initb = zeros
+    bias
   end
-  Dense(init(out, in), bias ? initb(out) : Zeros(), σ)
+
+  return Dense(W, b, σ)
 end
 
 @functor Dense
 
 function (a::Dense)(x::AbstractVecOrMat)
   W, b, σ = a.weight, a.bias, a.σ
-  σ.(W * x .+ b) 
+  return σ.(W*x .+ b)
 end
 
-(a::Dense)(x) =
+(a::Dense)(x::AbstractArray) = 
   reshape(a(reshape(x, size(x,1), :)), :, size(x)[2:end]...)
 
 function Base.show(io::IO, l::Dense)
@@ -292,6 +296,7 @@ If `x` and `y` are matrices, then each column of the output `z = B(x, y)` is of
 with `B` a Bilinear layer.
 
 If `y` is not given, it is taken to be equal to `x`, i.e. `B(x) == B(x, x)`
+
 The two inputs may also be provided as a tuple, `B((x, y)) == B(x, y)`,
 which is accepted as the input to a `Chain`.
 
@@ -300,7 +305,6 @@ By default the bias vector is `zeros(Float32, out)`, option `bias=false` will sw
 trainable bias. Either of these may be provided explicitly.
 
 # Examples
-
 ```jldoctest
 julia> x, y = randn(Float32, 5, 32), randn(Float32, 5, 32);
 
@@ -417,4 +421,4 @@ function Base.show(io::IO, m::Parallel)
   print(io, "Parallel(", m.connection, ", ")
   join(io, m.layers, ", ")
   print(io, ")")
-end
+end
diff --git a/src/utils.jl b/src/utils.jl
@@ -388,7 +388,6 @@ to the constructor's keyword `bias=bias`.
 function create_bias(weights::AbstractArray, bias::Bool, dims::Integer...)
   bias ? fill!(similar(weights, dims...), 0) : Zeros()
 end
-
 function create_bias(weights::AbstractArray, bias::AbstractArray, dims::Integer...)
   size(bias) == dims || throw(DimensionMismatch("expected bias of size $(dims), got size $(size(bias))"))
   bias
diff --git a/test/layers/basic.jl b/test/layers/basic.jl
@@ -40,17 +40,16 @@ import Flux: activations
       @test Dense(rand(Float16, 100,10), true).bias isa Vector{Float16}  # creates matching type
       @test_skip Dense(rand(Float16, 100,10), rand(100)).bias isa Vector{Float16}  # converts to match
 
-      @test_skip Dense(3,4; init=Base.randn, bias=true).bias isa Vector{Float64}
+      @test Dense(3,4; init=Base.randn, bias=true).bias isa Vector{Float64}
       @test_skip Dense(3,4; init=Base.randn, bias=[1,2,3,4]).bias isa Vector{Float64}
 
-
       @test_throws MethodError Dense(10, 10.5)
       @test_throws MethodError Dense(10, 10.5, tanh)
-      # @test_throws DimensionMismatch Dense(3,4; bias=rand(5))
-      # @test_throws DimensionMismatch Dense(rand(4,3), rand(5))
-      # @test_throws MethodError Dense(rand(5))
-      # @test_throws MethodError Dense(rand(5), rand(5))
-      # @test_throws MethodError Dense(rand(5), rand(5), tanh)
+      @test_throws DimensionMismatch Dense(3,4; bias=rand(5))
+      @test_throws DimensionMismatch Dense(rand(4,3), rand(5))
+      @test_throws MethodError Dense(rand(5))
+      @test_throws MethodError Dense(rand(5), rand(5))
+      @test_throws MethodError Dense(rand(5), rand(5), tanh)
     end
     @testset "dimensions" begin
       @test  length(Dense(10, 5)(randn(10))) == 5