coagulate gates

This commit is contained in:
Mike J Innes 2018-01-31 16:56:27 +00:00
parent 8ad837bb70
commit af3ccf85ff
2 changed files with 33 additions and 38 deletions

View File

@ -42,22 +42,12 @@ const RNN_ALGO_PERSIST_DYNAMIC = 2
# GRU: [weight, bias] × [input, hidden] × [reset, update, newmem]
# LSTM: [weight, bias] × [input, hidden] × [input, forget, newmem, output]
weightsizes(input, hidden, n = 1) = [(in,hidden) for in in (input, hidden) for gate in 1:n]
biassizes(input, hidden, n = 1) = [(hidden,) for gate in 1:n]
function params(w::CuVector{T}, input, hidden, n = 1) where T
weights = CuMatrix{T}[]
biases = CuVector{T}[]
offset = 0
for p in weightsizes(input, hidden, n)
push!(weights, reshape(w[offset+(1:prod(p))], p))
offset += prod(p)
end
for p in biassizes(input, hidden, n)
push!(biases, w[offset+(1:prod(p))])
offset += prod(p)
end
return weights, biases
function params(w::CuVector, input, hidden, n = 1)
slice(offset, shape) = reshape(w[offset+(1:prod(shape))], shape)
wx = slice(0, (input, hidden*n))
wh = slice(length(wx), (hidden, hidden*n))
bias = w[length(wx)+length(wh) + (1:hidden*n)]
(wx, wh), bias
end
mutable struct RNNDesc{T}
@ -65,8 +55,8 @@ mutable struct RNNDesc{T}
input::Int
hidden::Int
params::CuVector{T}
weights::Vector{CuMatrix{T}}
biases::Vector{CuVector{T}}
weights::NTuple{2,CuMatrix{T}}
bias::CuVector{T}
ptr::Ptr{Void}
end

View File

@ -6,37 +6,38 @@ using CUDAnative
info("Testing Flux/CUDNN")
function randinit(r::RNNDesc{T}) where T
for w in r.weights
copy!(w, randn(T, size(w)))
end
for w in r.biases
for w in (r.weights..., r.bias)
copy!(w, randn(T, size(w)))
end
end
const cutanh = CUDAnative.tanh
gate(rnn, x, n) = x[(1:rnn.hidden) + rnn.hidden*(n-1)]
function test_forward(rnn::RNNDesc, x, h, c = nothing)
if rnn.mode == CUDA.RNN_RELU
Wx, Wh = rnn.weights
b, = rnn.biases
b = rnn.bias
h = relu.(Wx'*x .+ Wh'*h .+ b)
return h, h
elseif rnn.mode == CUDA.GRU
Rx, Ux, Cx, Rh, Uh, Ch = rnn.weights
bR, bU, bC = rnn.biases
r = σ.(Rx'*x .+ Rh'*h .+ bR)
z = σ.(Ux'*x .+ Uh'*h .+ bU)
= cutanh.(Cx'*x .+ r .* Ch'*h .+ bC)
Wx, Wh = rnn.weights
b = rnn.bias
gx, gh = Wx'*x, Wh'*h
r = σ.(gate(rnn, gx, 1) .+ gate(rnn, gh, 1) .+ gate(rnn, b, 1))
z = σ.(gate(rnn, gx, 2) .+ gate(rnn, gh, 2) .+ gate(rnn, b, 2))
= cutanh.(gate(rnn, gx, 3) .+ r .* gate(rnn, gh, 3) .+ gate(rnn, b, 3))
h = (1.-z).* .+ z.*h
return h, h
elseif rnn.mode == CUDA.LSTM
Ix, Fx, Cx, Ox, Ih, Fh, Ch, Oh = rnn.weights
bI, bF, bC, bO = rnn.biases
input = σ.(Ix'*x .+ Ih'*h .+ bI)
forget = σ.(Fx'*x .+ Fh'*h .+ bF)
cell = cutanh.(Cx'*x .+ Ch'*h .+ bC)
output = σ.(Ox'*x .+ Oh'*h .+ bO)
Wx, Wh = rnn.weights
b = rnn.bias
g = Wx'*x .+ Wh'*h .+ b
input = σ.(gate(rnn, g, 1))
forget = σ.(gate(rnn, g, 2))
cell = cutanh.(gate(rnn, g, 3))
output = σ.(gate(rnn, g, 4))
c = forget .* c .+ input .* cell
h = output .* cutanh.(c)
return (h, h, c)
@ -48,17 +49,21 @@ end
rnn = RNNDesc{Float32}(CUDA.RNN_RELU, 10, 5)
randinit(rnn)
x, h = cu(rand(10)), cu(rand(5))
@test collect(test_forward(rnn, x, h)[1]) collect(CUDA.forwardInference(rnn, x, h)[1])
@test collect(test_forward(rnn, x, h)[1])
collect(CUDA.forwardInference(rnn, x, h)[1])
rnn = RNNDesc{Float32}(CUDA.GRU, 10, 5)
randinit(rnn)
x, h = cu(rand(10)), cu(rand(5))
@test collect(test_forward(rnn, x, h)[1]) collect(CUDA.forwardInference(rnn, x, h)[1])
@test collect(test_forward(rnn, x, h)[1])
collect(CUDA.forwardInference(rnn, x, h)[1])
rnn = RNNDesc{Float32}(CUDA.LSTM, 10, 5)
randinit(rnn)
x, h, c = cu(rand(10)), cu(rand(5)), cu(rand(5))
@test collect(test_forward(rnn, x, h, c)[1]) collect(CUDA.forwardInference(rnn, x, h, c)[1])
@test collect(test_forward(rnn, x, h, c)[2]) collect(CUDA.forwardInference(rnn, x, h, c)[2])
@test collect(test_forward(rnn, x, h, c)[1])
collect(CUDA.forwardInference(rnn, x, h, c)[1])
@test collect(test_forward(rnn, x, h, c)[2])
collect(CUDA.forwardInference(rnn, x, h, c)[2])
end