Skip to content

Commit 68ba6e4

Browse files
Merge #563
563: noise shape for dropout r=MikeInnes a=chengchingwen I add the noise shape for dropout, similar to the `noise_shape` argument in [`tf.nn.dropout`](https://www.tensorflow.org/api_docs/python/tf/nn/dropout) Co-authored-by: chengchingwen <adgjl5645@hotmail.com> Co-authored-by: Peter <adgjl5645@hotmail.com>
2 parents 16fc41c + 9c1bb93 commit 68ba6e4

File tree

3 files changed

+38
-9
lines changed

3 files changed

+38
-9
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
# v0.8.0
55

6+
* [Dropout now has a `dims` argument for specifying the unbroadcast dimensions.](https://github.com/FluxML/Flux.jl/pull/563)
67
* New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311).
78
* New [Maxout layer](https://github.com/FluxML/Flux.jl/pull/647)
89
* Datasets are now [hash verified on download](https://github.com/FluxML/Flux.jl/pull/585) to avoid corruption.

src/layers/normalise.jl

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,52 @@ end
1313
_testmode!(m, test) = nothing
1414

1515
"""
16-
Dropout(p)
16+
Dropout(p, dims = :)
1717
1818
A Dropout layer. For each input, either sets that input to `0` (with probability
19-
`p`) or scales it by `1/(1-p)`. This is used as a regularisation, i.e. it
20-
reduces overfitting during training.
19+
`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted
20+
dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is
21+
used as a regularisation, i.e. it reduces overfitting during training. see also [`dropout`](@ref).
2122
2223
Does nothing to the input once in [`testmode!`](@ref).
2324
"""
2425
mutable struct Dropout{F}
2526
p::F
27+
dims::Union{Colon, Int, NTuple{N, Int} where N}
2628
active::Bool
2729
end
2830

29-
function Dropout(p)
31+
function Dropout(p; dims = :)
3032
@assert 0 p 1
31-
Dropout{typeof(p)}(p, true)
33+
Dropout{typeof(p)}(p, dims, true)
3234
end
3335

36+
_dropout_shape(s, ::Colon) = size(s)
37+
_dropout_shape(s, dims) = tuple((i dims ? 1 : si for (i, si) enumerate(size(s)))...)
38+
3439
_dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0)
3540

36-
function (a::Dropout)(x)
37-
a.active || return x
38-
y = similar(x)
41+
42+
"""
43+
dropout(x, p; dims = :)
44+
45+
The dropout function. For each input, either sets that input to `0` (with probability
46+
`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted
47+
dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is
48+
used as a regularisation, i.e. it reduces overfitting during training.
49+
"""
50+
function dropout(x, p; dims = :)
51+
y = similar(x, _dropout_shape(x, dims))
3952
rand!(y)
40-
y .= _dropout_kernel.(y, a.p, 1 - a.p)
53+
y .= _dropout_kernel.(y, p, 1 - p)
4154
return x .* y
4255
end
4356

57+
function (a::Dropout)(x)
58+
a.active || return x
59+
return dropout(x, a.p; dims = a.dims)
60+
end
61+
4462
_testmode!(a::Dropout, test) = (a.active = !test)
4563

4664
"""

test/layers/normalisation.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@ using Flux.Tracker: data
2626
testmode!(m)
2727
y = m(x)
2828
@test count(a->a == 0, y) == 0
29+
30+
x = rand(100, 50)
31+
m = Dropout(0.5, dims = 2)
32+
y = m(x)
33+
c = map(i->count(a->a==0, @view y[i, :]), 1:100)
34+
@test minimum(c) == maximum(c)
35+
m = Dropout(0.5, dims = 1)
36+
y = m(x)
37+
c = map(i->count(a->a==0, @view y[:, i]), 1:50)
38+
@test minimum(c) == maximum(c)
2939
end
3040

3141
@testset "BatchNorm" begin

0 commit comments

Comments
 (0)