Merge #898

898: Fix problem in crossentropy breaking GPU compilation r=MikeInnes a=kshyatt

Trying to run this simple example
```
using Flux, CuArrays
using Flux: crossentropy
model = Chain(
        Dense(728, 128, σ),
        LSTM(128, 256),
        LSTM(256, 128),
        Dense(128, 10),
        softmax) |> gpu
data = [rand(728) for i in 1:100];
out  = [rand(10) for i in 1:100];
loss(x, y) = crossentropy(model(x), y);
Flux.train!(loss, params(model), zip(gpu.(data), gpu.(out)), ADAM())
```
Old version of `crossentropy`:
```
ERROR: GPU compilation of #23(CuArrays.CuKernelState, CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global}, Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(*),Tuple{Base.Broadcast.Extruded{Array{Float32,1},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArray},Nothing,typeof(conj),Tuple{Base.Broadcast.Extruded{CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}}}) failed
KernelError: passing and using non-bitstype argument

Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(*),Tuple{Base.Broadcast.Extruded{Array{Float32,1},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArray},Nothing,typeof(conj),Tuple{Base.Broadcast.Extruded{CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}}}.
That type is not isbits, and such arguments are only allowed when they are unused by the kernel.  .args is of type Tuple{Base.Broadcast.Extruded{Array{Float32,1},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArray},Nothing,typeof(conj),Tuple{Base.Broadcast.Extruded{CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}} which is not isbits.
    .1 is of type Base.Broadcast.Extruded{Array{Float32,1},Tuple{Bool},Tuple{Int64}} which is not isbits.
      .x is of type Array{Float32,1} which is not isbits.


Stacktrace:
 [1] check_invocation(::CUDAnative.CompilerJob, ::LLVM.Function) at /mnt/home/khyatt/.julia/dev/CUDAnative/src/compiler/validation.jl:70
 [2] macro expansion at /mnt/home/khyatt/.julia/dev/CUDAnative/src/compiler/driver.jl:187 [inlined]
 [3] macro expansion at /mnt/home/khyatt/.julia/packages/TimerOutputs/7zSea/src/TimerOutput.jl:216 [inlined]
 [4] #codegen#136(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at /mnt/home/khyatt/.julia/dev/CUDAnative/src/compiler/driver.jl:186
 [5] #codegen at ./none:0 [inlined]
 [6] #compile#135(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at /mnt/home/khyatt/.julia/dev/CUDAnative/src/compiler/driver.jl:47
 [7] #compile#134 at ./none:0 [inlined]
 [8] #compile at ./none:0 [inlined] (repeats 2 times)
 [9] macro expansion at /mnt/home/khyatt/.julia/dev/CUDAnative/src/execution.jl:389 [inlined]
 [10] #cufunction#176(::Nothing, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(CUDAnative.cufunction), ::GPUArrays.var"#23#24", ::Type{Tuple{CuArrays.CuKernelState,CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(*),Tuple{Base.Broadcast.Extruded{Array{Float32,1},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArray},Nothing,typeof(conj),Tuple{Base.Broadcast.Extruded{CUDAnative.CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}}}}}) at /mnt/home/khyatt/.julia/dev/CUDAnative/src/execution.jl:357
 [11] cufunction(::Function, ::Type) at /mnt/home/khyatt/.julia/dev/CUDAnative/src/execution.jl:357
 [12] macro expansion at /mnt/home/khyatt/.julia/dev/CUDAnative/src/execution.jl:174 [inlined]
 [13] macro expansion at ./gcutils.jl:91 [inlined]
 [14] macro expansion at /mnt/home/khyatt/.julia/dev/CUDAnative/src/execution.jl:171 [inlined]
 [15] _gpu_call(::CuArrays.CuArrayBackend, ::Function, ::CuArray{Float32,1}, ::Tuple{CuArray{Float32,1},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(*),Tuple{Base.Broadcast.Extruded{Array{Float32,1},Tuple{Bool},Tuple{Int64}},Base.Broadcast.Broadcasted{Base.Broadcast.ArrayStyle{CuArray},Nothing,typeof(conj),Tuple{Base.Broadcast.Extruded{CuArray{Float32,1},Tuple{Bool},Tuple{Int64}}}}}}}, ::Tuple{Tuple{Int64},Tuple{Int64}}) at /mnt/home/khyatt/.julia/dev/CuArrays/src/gpuarray_interface.jl:60
 [16] gpu_call at /mnt/home/khyatt/.julia/dev/GPUArrays/src/abstract_gpu_interface.jl:151 [inlined]
 [17] gpu_call at /mnt/home/khyatt/.julia/dev/GPUArrays/src/abstract_gpu_interface.jl:128 [inlined]
 [18] copyto! at /mnt/home/khyatt/.julia/dev/GPUArrays/src/broadcast.jl:48 [inlined]
 [19] copyto! at ./broadcast.jl:863 [inlined]
 [20] copy at ./broadcast.jl:839 [inlined]
 [21] materialize at ./broadcast.jl:819 [inlined]
 [22] (::Zygote.var"#1310#1311"{CuArray{Float32,1},CuArray{Float32,1}})(::Array{Float32,1}) at /mnt/home/khyatt/.julia/dev/Zygote/src/lib/broadcast.jl:68
```
New version:
```
julia> Flux.train!(loss, params(model), zip(gpu.(data), gpu.(out)), ADAM())

julia> # everyone finished happily and went on with their lives
```

Co-authored-by: Katharine Hyatt <khyatt@flatironinstitute.org>

This commit is contained in:

bors[bot]

2019-10-23 14:31:53 +00:00

committed by

GitHub

parent fa5737fb5c 8913c9c741

commit 645aa04464

No known key found for this signature in database

GPG Key ID: 4AEE18F83AFDEB23

2 changed files with 14 additions and 2 deletions

									
										14

src/layers/stateless.jl
									
										View File
										
				@ -4,10 +4,20 @@ using NNlib: logsoftmax, logσ

				mse(ŷ, y) = sum((ŷ .- y).^2) * 1 // length(y)

				function crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)

				  -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2)

				function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing)

				  return -sum(y .* log.(ŷ)) * 1 // size(y, 2)

				end

				function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Number)

				  return -sum(y .* log.(ŷ)) .* weight * 1 // size(y, 2)

				end

				function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::AbstractVector)

				  return -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2)

				end

				crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(ŷ, y, weight)

				function logitcrossentropy(logŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)

				  return -sum(y .* logsoftmax(logŷ) .* weight) * 1 // size(y, 2)

				end

									
										2

test/cuda/cuda.jl
									
										View File
										
				@ -28,6 +28,8 @@ cm = gpu(m)

				x = [1,2,3]

				cx = gpu(x)

				@test Flux.crossentropy(x,x) ≈ Flux.crossentropy(cx,cx)

				@test Flux.crossentropy(x,x, weight=1.0) ≈ Flux.crossentropy(cx,cx, weight=1.0)

				@test Flux.crossentropy(x,x, weight=[1.0;2.0;3.0]) ≈ Flux.crossentropy(cx,cx, weight=cu([1.0;2.0;3.0]))

				xs = rand(5, 5)

				ys = Flux.onehotbatch(1:5,1:5)

Merge #898

14 src/layers/stateless.jl Unescape Escape View File

2 test/cuda/cuda.jl Unescape Escape View File

14

src/layers/stateless.jl

View File

2

test/cuda/cuda.jl

View File