2018-06-20 13:11:31 +00:00
|
|
|
import Base: *, ==
|
|
|
|
|
2018-07-12 20:08:53 +00:00
|
|
|
import LinearAlgebra
|
2018-07-17 14:57:39 +00:00
|
|
|
using Statistics
|
2018-07-18 13:39:20 +00:00
|
|
|
using LinearAlgebra: Transpose, Adjoint, diagm, diag
|
2018-06-20 14:18:07 +00:00
|
|
|
|
2018-02-07 17:43:25 +00:00
|
|
|
struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
|
|
|
|
tracker::Tracked{A}
|
|
|
|
data::A
|
|
|
|
grad::A
|
|
|
|
TrackedArray{T,N,A}(t::Tracked{A}, data::A) where {T,N,A} = new(t, data)
|
|
|
|
TrackedArray{T,N,A}(t::Tracked{A}, data::A, grad::A) where {T,N,A} = new(t, data, grad)
|
|
|
|
end
|
|
|
|
|
2018-07-09 18:44:14 +00:00
|
|
|
data(x::TrackedArray) = x.data
|
2018-02-07 17:43:25 +00:00
|
|
|
tracker(x::TrackedArray) = x.tracker
|
|
|
|
|
|
|
|
TrackedVector{T,A} = TrackedArray{T,1,A}
|
|
|
|
TrackedMatrix{T,A} = TrackedArray{T,2,A}
|
|
|
|
TrackedVecOrMat{T,A} = Union{TrackedVector{T,A},TrackedMatrix{T,A}}
|
|
|
|
|
2018-02-07 20:39:36 +00:00
|
|
|
track(c::Call, x::AbstractArray) = TrackedArray(c, x)
|
|
|
|
|
2018-02-07 17:43:25 +00:00
|
|
|
TrackedArray(c::Call, x::A) where A <: AbstractArray =
|
2018-07-09 18:44:14 +00:00
|
|
|
TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c), x)
|
2018-02-07 17:43:25 +00:00
|
|
|
|
|
|
|
TrackedArray(c::Call, x::A, Δ::A) where A <: AbstractArray =
|
2018-07-09 18:44:14 +00:00
|
|
|
TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c, Δ), x, Δ)
|
2018-02-07 17:43:25 +00:00
|
|
|
|
2018-07-18 05:51:40 +00:00
|
|
|
TrackedArray(x::AbstractArray) = TrackedArray(Call(), x, zero(x))
|
2018-02-07 17:43:25 +00:00
|
|
|
|
2018-02-13 10:20:38 +00:00
|
|
|
Base.eltype(x::Type{<:TrackedArray{T}}) where T <: Real = TrackedReal{T}
|
|
|
|
|
2018-02-07 17:43:25 +00:00
|
|
|
Base.show(io::IO, ::Type{TrackedArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} =
|
|
|
|
print(io, "TrackedArray{…,$A}")
|
|
|
|
|
2018-06-20 14:16:45 +00:00
|
|
|
function Base.summary(io::IO, x::TrackedArray)
|
|
|
|
print(io, "Tracked ")
|
|
|
|
summary(io, data(x))
|
2018-02-07 17:43:25 +00:00
|
|
|
end
|
|
|
|
|
2018-06-20 14:16:45 +00:00
|
|
|
Base.print_array(io::IO, x::TrackedArray) = Base.print_array(io, data(x))
|
|
|
|
|
2018-02-07 17:43:25 +00:00
|
|
|
Base.setindex!(xs::TrackedArray, v, i...) =
|
|
|
|
error("Can't differentiate `setindex!`")
|
|
|
|
|
2018-04-30 11:09:15 +00:00
|
|
|
back!(::TrackedArray) = error("Value is not scalar; use `back!(sum(x))` or `back!(x, Δ)`")
|
2018-02-07 20:39:36 +00:00
|
|
|
|
2018-02-07 17:43:25 +00:00
|
|
|
# Fallthrough methods
|
|
|
|
|
|
|
|
for f in :[Base.size, Base.ndims].args
|
|
|
|
@eval @inline $f(x::TrackedArray, a...) = $f(data(x), a...)
|
|
|
|
end
|
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
Base.size(x::TrackedArray, i::Integer, j::Integer, is::Integer...) =
|
|
|
|
size(data(x), i, j, is...)
|
|
|
|
|
2018-02-07 17:43:25 +00:00
|
|
|
Base.similar(x::TrackedArray, dims::Union{AbstractUnitRange,Integer}...) =
|
|
|
|
similar(data(x), dims...)
|
|
|
|
|
|
|
|
Base.similar(x::TrackedArray, T::Type) = similar(data(x), T)
|
|
|
|
|
2018-06-20 13:11:31 +00:00
|
|
|
x::TrackedArray == y = data(x) == y
|
|
|
|
y == x::TrackedArray = y == data(x)
|
|
|
|
x::TrackedArray == y::TrackedArray = data(x) == data(y)
|
2018-02-07 17:43:25 +00:00
|
|
|
|
|
|
|
# Array Stdlib
|
|
|
|
|
2018-02-07 20:39:36 +00:00
|
|
|
Base.getindex(xs::TrackedArray, i...) = track(getindex, xs, i...)
|
2017-08-19 09:14:50 +00:00
|
|
|
|
2018-07-10 17:16:37 +00:00
|
|
|
@grad function getindex(xs::AbstractArray, i...)
|
2018-07-09 12:39:10 +00:00
|
|
|
data(xs)[i...], function (Δ)
|
2018-07-10 08:03:09 +00:00
|
|
|
Δ′ = zero(xs)
|
|
|
|
Δ′[i...] = data(Δ)
|
|
|
|
(nobacksies(:getindex, Δ′), map(_->nothing, i)...)
|
2018-07-09 12:39:10 +00:00
|
|
|
end
|
2017-08-19 15:02:19 +00:00
|
|
|
end
|
|
|
|
|
2018-02-07 20:39:36 +00:00
|
|
|
Base.:-(xs::TrackedArray) = track(-, xs)
|
2017-08-19 15:02:19 +00:00
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad -(xs) = -data(xs), Δ -> (-Δ,)
|
2017-08-23 16:50:43 +00:00
|
|
|
|
2018-02-07 20:39:36 +00:00
|
|
|
Base.transpose(xs::TrackedArray) = track(transpose, xs)
|
2018-06-20 13:11:31 +00:00
|
|
|
Base.adjoint(xs::TrackedArray) = track(adjoint, xs)
|
2017-09-01 15:42:18 +00:00
|
|
|
|
2018-06-20 14:18:07 +00:00
|
|
|
@grad transpose(xs) = transpose(data(xs)), Δ -> (reshape(transpose(Δ), size(xs)),)
|
2018-06-20 13:11:31 +00:00
|
|
|
@grad adjoint(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),)
|
2017-09-03 21:10:23 +00:00
|
|
|
|
2018-08-03 14:14:10 +00:00
|
|
|
Base.repeat(A::TrackedArray; kw...) = track(repeat, A; kw...)
|
2018-05-23 00:39:45 +00:00
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad function repeat(xs; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A)))
|
2018-07-10 08:03:09 +00:00
|
|
|
repeat(data(xs), inner = inner, outer = outer), function (Δ)
|
2018-07-09 12:39:10 +00:00
|
|
|
Δ′ = zero(xs)
|
|
|
|
S = size(xs)
|
2018-07-06 10:28:18 +00:00
|
|
|
|
2018-05-23 00:39:45 +00:00
|
|
|
# Loop through each element of Δ, calculate source dimensions, accumulate into Δ′
|
2018-07-18 13:39:20 +00:00
|
|
|
for (dest_idx, val) in pairs(IndexCartesian(), data(Δ))
|
2018-05-23 00:39:45 +00:00
|
|
|
# First, round dest_idx[dim] to nearest gridpoint defined by inner[dim], then
|
|
|
|
# wrap around based on original size S.
|
|
|
|
src_idx = [mod1(div(dest_idx[dim] - 1, inner[dim]) + 1, S[dim]) for dim in 1:length(S)]
|
|
|
|
Δ′[src_idx...] += val
|
|
|
|
end
|
2018-07-10 08:03:09 +00:00
|
|
|
(nobacksies(:repeat, Δ′),)
|
2018-07-09 12:39:10 +00:00
|
|
|
end
|
2018-05-23 00:39:45 +00:00
|
|
|
end
|
|
|
|
|
2018-05-02 06:37:30 +00:00
|
|
|
for f in [:vcat, :hcat]
|
2018-07-12 19:42:32 +00:00
|
|
|
UArray = :(Union{TrackedArray,Vector,Matrix,Adjoint,Transpose})
|
2018-05-02 06:37:30 +00:00
|
|
|
@eval begin
|
2018-05-07 12:03:52 +00:00
|
|
|
# This section is a bit of a hack since julia doesn't have a standardised
|
|
|
|
# promotion mechanism for concatenation yet
|
|
|
|
# https://github.com/JuliaLang/julia/pull/20815
|
2018-05-02 06:37:30 +00:00
|
|
|
|
2018-05-07 12:03:52 +00:00
|
|
|
# It should support tracked concatenation with rank ∈ (1,2) with a
|
|
|
|
# TrackedArray anywhere among the arguments This works as long as base has
|
|
|
|
# other functions that captures `(::Union{Vector,RowVector,Matrix}...)`.
|
2018-07-12 19:42:32 +00:00
|
|
|
Base.$f(a::$UArray...) = track($f, a...)
|
2018-05-02 12:57:32 +00:00
|
|
|
|
2018-05-07 12:03:52 +00:00
|
|
|
# It should support tracked concatenation with rank>2 if the TrackedArray is
|
|
|
|
# first
|
2018-05-02 12:57:32 +00:00
|
|
|
Base.$f(a::TrackedArray, b::AbstractArray...) = track($f, a, b...)
|
2018-07-12 19:42:32 +00:00
|
|
|
Base.$f(a::TrackedArray, b::$UArray...) = track($f, a, b...) # resolves ambiguity introduced by previous row
|
2018-05-02 12:57:32 +00:00
|
|
|
|
2018-05-07 12:03:52 +00:00
|
|
|
# It should support tracked concatenation with rank>2 if the TrackedArray is
|
|
|
|
# second
|
2018-05-02 12:57:32 +00:00
|
|
|
Base.$f(a::Array, b::TrackedArray, c::AbstractArray...) = track($f, a, b, c...)
|
2018-07-12 19:42:32 +00:00
|
|
|
Base.$f(a::Union{Vector,Matrix,Adjoint,Transpose}, b::TrackedArray,
|
|
|
|
c::$UArray...) =
|
2018-05-07 12:03:52 +00:00
|
|
|
track($f, a, b, c...) # resolves ambiguity introduced by previous row
|
2018-05-02 06:37:30 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad function vcat(xs...)
|
2018-07-10 08:03:09 +00:00
|
|
|
vcat(data.(xs)...), function (Δ)
|
2018-07-09 12:39:10 +00:00
|
|
|
start = 0
|
|
|
|
Δs = [begin
|
|
|
|
i = map(_ -> :, size(xsi)) |> Base.tail
|
|
|
|
d = Δ[start+1:start+size(xsi,1), i...]
|
|
|
|
start += size(xsi, 1)
|
|
|
|
d
|
|
|
|
end for xsi in xs]
|
|
|
|
return (Δs...,)
|
2017-12-08 15:10:09 +00:00
|
|
|
end
|
2017-09-05 06:11:28 +00:00
|
|
|
end
|
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad function hcat(xs...)
|
2018-07-10 08:03:09 +00:00
|
|
|
hcat(data.(xs)...), function (Δ)
|
2018-07-09 12:39:10 +00:00
|
|
|
start = 0
|
|
|
|
Δs = [begin
|
|
|
|
d = if ndims(xsi) == 1
|
|
|
|
Δ[:, start+1]
|
|
|
|
else
|
|
|
|
i = map(_ -> :, size(xsi)) |> Base.tail |> Base.tail
|
|
|
|
Δ[:, start+1:start+size(xsi,2), i...]
|
|
|
|
end
|
|
|
|
start += size(xsi, 2)
|
|
|
|
d
|
|
|
|
end for xsi in xs]
|
|
|
|
return (Δs...,)
|
2018-05-02 13:56:08 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-08-03 14:14:10 +00:00
|
|
|
Base.cat(a::TrackedArray; dims) = track(cat, a, dims = dims)
|
|
|
|
Base.cat(a::TrackedArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims)
|
|
|
|
Base.cat(a::TrackedArray, b::AbstractArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims)
|
|
|
|
Base.cat(a::AbstractArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims)
|
2018-05-02 06:37:30 +00:00
|
|
|
|
2018-07-12 19:42:32 +00:00
|
|
|
@grad function cat(Xs...; dims)
|
|
|
|
cat(data.(Xs)..., dims = dims), function (Δ)
|
2018-07-12 19:59:07 +00:00
|
|
|
start = ntuple(i -> 0, Val(ndims(Δ)))
|
2018-07-09 12:39:10 +00:00
|
|
|
Δs = [begin
|
|
|
|
dim_xs = 1:ndims(xs)
|
2018-07-12 19:59:07 +00:00
|
|
|
till_xs = ntuple((i -> i in dims ? (i in dim_xs ? size(xs,i) : 1) : 0), Val(ndims(Δ)))
|
|
|
|
xs_in_Δ = ntuple(i -> till_xs[i] > 0 ? (start[i]+1:start[i]+till_xs[i]) : Colon(), Val(ndims(Δ)))
|
2018-07-09 12:39:10 +00:00
|
|
|
d = reshape(Δ[xs_in_Δ...],size(xs))
|
|
|
|
start = start .+ till_xs
|
|
|
|
d
|
|
|
|
end for xs in Xs]
|
2018-07-12 19:42:32 +00:00
|
|
|
return (Δs...,)
|
2018-05-02 13:56:08 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-04-02 20:09:57 +00:00
|
|
|
Base.reshape(xs::TrackedArray, dims::Union{Colon,Int64}...) = reshape(xs, dims)
|
|
|
|
Base.reshape(xs::TrackedArray, dims::Tuple{Vararg{Union{Int64,Colon}}}) = reshape(xs, Base._reshape_uncolon(xs, dims))
|
|
|
|
Base.reshape(xs::TrackedArray, dims::Tuple{Vararg{Int64}}) = track(reshape, xs, dims)
|
2018-02-08 19:27:57 +00:00
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad reshape(xs, dims) = reshape(data(xs), dims), Δ -> (reshape(Δ, size(xs)),nothing)
|
2017-12-15 16:18:16 +00:00
|
|
|
|
2018-02-28 02:19:58 +00:00
|
|
|
Base.permutedims(xs::TrackedArray, dims) = track(permutedims, xs, dims)
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad permutedims(xs, dims) = permutedims(data(xs), dims), Δ -> (permutedims(Δ, invperm(dims)),nothing)
|
2018-02-08 19:27:57 +00:00
|
|
|
|
2018-02-16 14:15:40 +00:00
|
|
|
function _kron(mat1::AbstractMatrix,mat2::AbstractMatrix)
|
2018-02-08 19:27:57 +00:00
|
|
|
m1, n1 = size(mat1)
|
|
|
|
mat1_rsh = reshape(mat1,(1,m1,1,n1))
|
|
|
|
|
|
|
|
m2, n2 = size(mat2)
|
|
|
|
mat2_rsh = reshape(mat2,(m2,1,n2,1))
|
|
|
|
|
|
|
|
return reshape(mat1_rsh.*mat2_rsh, (m1*m2,n1*n2))
|
|
|
|
end
|
|
|
|
|
2018-02-16 14:15:40 +00:00
|
|
|
Base.kron(a::TrackedMatrix, b::TrackedMatrix) = _kron(a, b)
|
|
|
|
Base.kron(a::TrackedMatrix, b::AbstractMatrix) = _kron(a, b)
|
|
|
|
Base.kron(a::AbstractMatrix, b::TrackedMatrix) = _kron(a, b)
|
|
|
|
|
2017-08-22 11:24:08 +00:00
|
|
|
# Reductions
|
|
|
|
|
2018-07-17 14:57:39 +00:00
|
|
|
Base.sum(xs::TrackedArray; dims) = track(sum, xs, dims)
|
2018-02-07 20:39:36 +00:00
|
|
|
Base.sum(xs::TrackedArray) = track(sum, xs)
|
2018-02-09 19:00:26 +00:00
|
|
|
Base.sum(f::Union{Function,Type},xs::TrackedArray) = sum(f.(xs))
|
2017-08-22 11:24:08 +00:00
|
|
|
|
2018-07-17 14:57:39 +00:00
|
|
|
@grad sum(xs, dims::Int) = sum(data(xs), dims = dims),
|
|
|
|
Δ -> (zero(xs) .+ Δ, nothing)
|
|
|
|
@grad sum(xs, dims) = sum(data(xs), dims = dims),
|
|
|
|
Δ -> (zero(xs) .+ Δ, map(_->nothing,dims)...)
|
|
|
|
@grad sum(xs) = sum(data(xs)),
|
|
|
|
Δ -> (zero(xs) .+ Δ,)
|
2017-08-22 11:24:08 +00:00
|
|
|
|
2018-03-06 10:01:19 +00:00
|
|
|
Base.prod(xs::TrackedArray, dim) = track(prod, xs, dim)
|
|
|
|
Base.prod(xs::TrackedArray) = track(prod, xs)
|
|
|
|
Base.prod(f::Union{Function, Type}, xs::TrackedArray) = prod(f.(xs))
|
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad prod(xs) = prod(data(xs)), Δ -> (prod(xs) ./ xs .* Δ,)
|
2018-07-18 06:41:10 +00:00
|
|
|
@grad prod(xs, dim) = prod(data(xs), dims = dim),
|
2018-07-10 08:03:09 +00:00
|
|
|
Δ -> (nobacksies(:sum,
|
|
|
|
reshape(.*(circshift.([reshape(data(xs), length(xs))], 1:length(xs)-1)...), size(xs)) .* Δ),
|
|
|
|
nothing)
|
2018-03-06 10:01:19 +00:00
|
|
|
|
2017-09-07 01:21:35 +00:00
|
|
|
Base.findfirst(xs::TrackedArray, args...) = findfirst(xs.data, args...)
|
2017-09-02 03:33:05 +00:00
|
|
|
|
2018-07-17 14:57:39 +00:00
|
|
|
Statistics.mean(xs::TrackedArray) = track(mean, xs)
|
|
|
|
Statistics.mean(xs::TrackedArray, region) = track(mean, xs, region)
|
2017-10-30 08:21:02 +00:00
|
|
|
|
2018-04-27 21:14:01 +00:00
|
|
|
Base.maximum(xs::TrackedArray) = track(maximum, xs)
|
|
|
|
Base.maximum(xs::TrackedArray, region) = track(maximum, xs, region)
|
|
|
|
Base.minimum(xs::TrackedArray) = track(minimum, xs)
|
|
|
|
Base.minimum(xs::TrackedArray, region) = track(minimum, xs, region)
|
|
|
|
|
2018-06-20 14:18:07 +00:00
|
|
|
import LinearAlgebra: dot
|
|
|
|
|
|
|
|
dot(xs::TrackedVector, ys::TrackedVector) = track(dot, xs, ys)
|
|
|
|
dot(xs::AbstractVector, ys::TrackedVector) = track(dot, xs, ys)
|
|
|
|
dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys)
|
2017-12-12 17:23:15 +00:00
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad dot(xs, ys) = dot(data(xs), data(ys)), Δ -> (Δ .* ys, Δ .* xs)
|
2017-12-12 17:23:15 +00:00
|
|
|
|
2018-06-20 14:18:07 +00:00
|
|
|
using StatsBase
|
|
|
|
|
2017-11-21 16:04:04 +00:00
|
|
|
# Hacks to get std working
|
2018-07-17 14:57:39 +00:00
|
|
|
StatsBase.std(x::TrackedArray; mean = Statistics.mean(x)) =
|
2017-11-21 16:04:04 +00:00
|
|
|
sqrt.(sum((x .- mean).^2) ./ (length(x)-1))
|
2018-07-17 14:57:39 +00:00
|
|
|
StatsBase.std(x::TrackedArray, dim; mean = Statistics.mean(x, dim)) =
|
2017-11-21 16:04:04 +00:00
|
|
|
sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1))
|
|
|
|
|
2018-06-20 14:18:07 +00:00
|
|
|
LinearAlgebra.vecnorm(x::TrackedArray, p::Real = 2) =
|
2018-03-05 17:24:46 +00:00
|
|
|
sum(abs.(x).^p .+ eps(0f0))^(1/p) # avoid d(sqrt(x))/dx == Inf at 0
|
2018-02-09 19:00:26 +00:00
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad mean(xs) = mean(data(xs)), Δ -> (Δ / length(xs),)
|
2018-07-18 13:39:20 +00:00
|
|
|
@grad mean(xs, region) = mean(data(xs), dims = region), Δ -> (zero(xs) .+ Δ ./ prod(size(xs, region...)),nothing)
|
2017-10-30 08:21:02 +00:00
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad function maximum(xs, r...)
|
2018-07-10 08:03:09 +00:00
|
|
|
maximum(data(xs), r...), function (Δ)
|
|
|
|
Δ′ = zero(xs)
|
|
|
|
_, i = findmax(data(xs), r...)
|
|
|
|
Δ′[i] = data(Δ)
|
|
|
|
return (nobacksies(:maximum, Δ′),map(_->nothing,r)...)
|
2018-07-09 12:39:10 +00:00
|
|
|
end
|
2018-04-27 21:14:01 +00:00
|
|
|
end
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad function minimum(xs, r...)
|
2018-07-10 08:03:09 +00:00
|
|
|
minimum(data(xs), r...), function (Δ)
|
|
|
|
Δ′ = zero(xs)
|
|
|
|
_, i = findmin(data(xs), r...)
|
|
|
|
Δ′[i] = data(Δ)
|
|
|
|
return (nobacksies(:minimum, Δ′),map(_->nothing,r)...)
|
2018-07-09 12:39:10 +00:00
|
|
|
end
|
2018-04-27 21:14:01 +00:00
|
|
|
end
|
|
|
|
|
2017-08-22 11:24:08 +00:00
|
|
|
# BLAS
|
|
|
|
|
2018-06-20 14:18:07 +00:00
|
|
|
LinearAlgebra.diagm(x::TrackedVector) = track(diagm, x)
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad diagm(x) = diagm(data(x)), Δ -> (diag(Δ),)
|
2018-02-05 18:29:35 +00:00
|
|
|
|
2018-06-20 13:11:31 +00:00
|
|
|
x::TrackedMatrix * y::AbstractMatrix = track(*, x, y)
|
2018-07-30 16:04:18 +00:00
|
|
|
x::AbstractMatrix * y::TrackedMatrix = track(*, x, y)
|
2018-06-20 13:11:31 +00:00
|
|
|
x::TrackedMatrix * y::TrackedMatrix = track(*, x, y)
|
2017-08-20 12:48:43 +00:00
|
|
|
|
2018-06-20 13:11:31 +00:00
|
|
|
x::TrackedMatrix * y::AbstractVector = track(*, x, y)
|
2018-07-30 16:04:18 +00:00
|
|
|
x::AbstractMatrix * y::TrackedVector = track(*, x, y)
|
2018-06-20 13:11:31 +00:00
|
|
|
x::TrackedMatrix * y::TrackedVector = track(*, x, y)
|
2018-07-10 08:03:09 +00:00
|
|
|
|
2018-06-20 13:11:31 +00:00
|
|
|
x::TrackedVector * y::AbstractVector = track(*, x, y)
|
2018-07-30 16:04:18 +00:00
|
|
|
x::AbstractVector * y::TrackedVector = track(*, x, y)
|
2018-06-20 13:11:31 +00:00
|
|
|
x::TrackedVector * y::TrackedVector = track(*, x, y)
|
2017-08-19 15:02:19 +00:00
|
|
|
|
2018-06-20 13:11:31 +00:00
|
|
|
@grad a::AbstractMatrix * b::AbstractVecOrMat =
|
|
|
|
data(a)*data(b), Δ -> (Δ * transpose(b), transpose(a) * Δ)
|
2017-11-07 19:34:27 +00:00
|
|
|
|
2018-07-30 16:04:18 +00:00
|
|
|
# @grad function (a::AbstractMatrix * b::AbstractVecOrMat)
|
|
|
|
# # @show size(a) size(b)
|
|
|
|
# data(a)*data(b), function (Δ)
|
|
|
|
# @show size(Δ) size(b) size(Δ*transpose(b)) size(Δ*transpose(data(b)))
|
|
|
|
# @show typeof(Δ) typeof(b)
|
|
|
|
# (Δ * transpose(b), transpose(a) * Δ)
|
|
|
|
# end
|
|
|
|
# end
|
|
|
|
|
2017-08-23 01:03:17 +00:00
|
|
|
# NNlib
|
|
|
|
|
2017-12-14 18:48:38 +00:00
|
|
|
using NNlib
|
2018-02-26 22:43:07 +00:00
|
|
|
import NNlib: softmax, ∇softmax, logsoftmax, ∇logsoftmax, conv, maxpool, meanpool
|
2017-08-23 01:03:17 +00:00
|
|
|
|
2018-02-07 20:39:36 +00:00
|
|
|
softmax(xs::TrackedArray) = track(softmax, xs)
|
2017-08-23 01:03:17 +00:00
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad softmax(xs) = softmax(data(xs)), Δ -> (nobacksies(:softmax, ∇softmax(data(Δ), data(xs))),)
|
2017-08-23 01:03:17 +00:00
|
|
|
|
2018-02-07 20:39:36 +00:00
|
|
|
logsoftmax(xs::TrackedArray) = track(logsoftmax, xs)
|
2018-01-21 07:20:59 +00:00
|
|
|
|
2018-07-10 08:03:09 +00:00
|
|
|
@grad logsoftmax(xs) = logsoftmax(data(xs)), Δ -> (nobacksies(:logsoftmax, ∇logsoftmax(data(Δ), data(xs))),)
|
2018-05-21 19:20:43 +00:00
|
|
|
|
2018-08-03 14:14:10 +00:00
|
|
|
conv(x::TrackedArray, w::TrackedArray; kw...) = track(conv, x, w; kw...)
|
|
|
|
conv(x::AbstractArray, w::TrackedArray; kw...) = track(conv, x, w; kw...)
|
|
|
|
conv(x::TrackedArray, w::AbstractArray; kw...) = track(conv, x, w; kw...)
|
2017-12-14 18:48:38 +00:00
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad conv(x, w; kw...) =
|
2018-07-10 08:03:09 +00:00
|
|
|
conv(data(x), data(w); kw...),
|
|
|
|
Δ -> nobacksies(:conv,
|
|
|
|
(NNlib.∇conv_data(data.((Δ, x, w))...; kw...),
|
|
|
|
NNlib.∇conv_filter(data.((Δ, x, w))...; kw...)))
|
2017-12-15 02:29:14 +00:00
|
|
|
|
2018-08-03 14:14:10 +00:00
|
|
|
maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...)
|
2017-12-15 02:29:14 +00:00
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad function maxpool(x, k; kw...)
|
2018-07-10 08:03:09 +00:00
|
|
|
y = maxpool(data(x), k; kw...)
|
|
|
|
y, Δ -> (nobacksies(:maxpool, NNlib.∇maxpool(data.((Δ, y, x))..., k; kw...)), nothing)
|
2018-07-09 12:39:10 +00:00
|
|
|
end
|
2018-02-26 22:43:07 +00:00
|
|
|
|
2018-08-03 14:14:10 +00:00
|
|
|
meanpool(x::TrackedArray, k; kw...) = track(meanpool, x, k; kw...)
|
2018-02-26 22:43:07 +00:00
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
@grad function meanpool(x, k; kw...)
|
2018-07-10 08:03:09 +00:00
|
|
|
y = meanpool(data(x), k; kw...)
|
|
|
|
y, Δ -> (nobacksies(:maxpool, NNlib.∇meanpool(data.((Δ, y, x))..., k; kw...)), nothing)
|
2018-07-09 12:39:10 +00:00
|
|
|
end
|
2017-12-15 02:29:14 +00:00
|
|
|
|
2017-08-19 15:02:19 +00:00
|
|
|
# Broadcasting
|
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
using ForwardDiff: Dual, partials, value
|
2017-08-19 15:02:19 +00:00
|
|
|
|
2018-07-12 18:28:30 +00:00
|
|
|
_size(x::AbstractArray) = size(x)
|
|
|
|
_size(x) = ()
|
|
|
|
|
2017-08-19 15:02:19 +00:00
|
|
|
dualify(xs, n) = xs
|
2018-07-09 12:39:10 +00:00
|
|
|
dualify(xs::AbstractArray, ps) = map(x -> Dual(x, ps), xs)
|
|
|
|
dualify(xs::Real, ps) = Dual(xs, ps)
|
2017-08-19 15:02:19 +00:00
|
|
|
|
2018-07-09 22:40:07 +00:00
|
|
|
unbroadcast(x::Tuple, Δ) =
|
|
|
|
x == size(Δ) ? Δ :
|
2018-07-17 14:57:39 +00:00
|
|
|
reshape(sum(Δ, dims = filter(n -> n > length(x) || x[n] == 1, 1:ndims(Δ))), x)
|
2017-08-27 08:49:42 +00:00
|
|
|
|
2018-07-09 22:40:07 +00:00
|
|
|
unbroadcast(x::Tuple{}, Δ) = sum(Δ)
|
2018-02-07 20:39:36 +00:00
|
|
|
|
2017-08-28 00:40:59 +00:00
|
|
|
function getpartial(Δ, x, i)
|
|
|
|
@inbounds p = getindex(partials(x), i)
|
|
|
|
return Δ * p
|
|
|
|
end
|
|
|
|
|
2018-07-09 12:39:10 +00:00
|
|
|
function ∇broadcast(f, args::Vararg{Any,N}) where N
|
2018-07-12 18:28:30 +00:00
|
|
|
sizes = _size.(args)
|
2018-07-12 19:59:07 +00:00
|
|
|
dargs = map((x,i) -> dualify(data(x), ntuple(j -> i==j, Val(N))), args, ntuple(identity, Val(N)))
|
2018-07-09 12:39:10 +00:00
|
|
|
out = broadcast(f, dargs...)
|
|
|
|
eltype(out) <: Dual || return out
|
|
|
|
y = value.(out)
|
2018-07-10 08:03:09 +00:00
|
|
|
back = function (Δ_)
|
|
|
|
Δ = data(Δ_)
|
2018-07-12 19:59:07 +00:00
|
|
|
Δargs = ntuple(i -> getpartial.(Δ, out, i), Val(N))
|
2018-07-10 08:03:09 +00:00
|
|
|
dxs = map((x, Δ) -> unbroadcast(x, Δ), sizes, Δargs)
|
|
|
|
nobacksies(:broadcast, dxs)
|
2018-07-09 12:39:10 +00:00
|
|
|
end
|
|
|
|
# So we can return non-tracked arrays
|
2018-07-09 18:44:14 +00:00
|
|
|
track(Call(back, tracker.(args)), y)
|
2017-08-19 09:14:50 +00:00
|
|
|
end
|
2017-08-19 15:02:19 +00:00
|
|
|
|
2018-07-12 18:28:30 +00:00
|
|
|
using Base.Broadcast: BroadcastStyle
|
|
|
|
|
|
|
|
struct TrackedStyle <: BroadcastStyle end
|
|
|
|
|
|
|
|
Broadcast.BroadcastStyle(::Type{<:Union{TrackedArray,TrackedReal}}) = TrackedStyle()
|
|
|
|
Broadcast.BroadcastStyle(::TrackedStyle, ::BroadcastStyle) = TrackedStyle()
|
|
|
|
|
|
|
|
function Base.copy(bc::Broadcast.Broadcasted{TrackedStyle})
|
|
|
|
bc = Broadcast.flatten(bc)
|
|
|
|
∇broadcast(bc.f, bc.args...)
|
|
|
|
end
|