diff --git a/src/utils.jl b/src/utils.jl index 581f9e01..1be5ded5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -11,8 +11,23 @@ unstack(xs, dim) = [slicedim(xs, dim, i) for i = 1:size(xs, dim)] batchindex(xs, i) = (reverse(Base.tail(reverse(indices(xs))))..., i) +""" + batch(xs) + +Batch the arrays in `xs` into a single array. + +```julia +julia> batch([[1,2,3],[4,5,6]]) +3×2 Array{Int64,2}: + 1 4 + 2 5 + 3 6 +``` +""" function batch(xs) - data = similar(first(xs), size(first(xs))..., length(xs)) + data = first(xs) isa AbstractArray ? + similar(first(xs), size(first(xs))..., length(xs)) : + Vector{eltype(xs)}(length(xs)) for (i, x) in enumerate(xs) data[batchindex(data, i)...] = x end @@ -21,7 +36,21 @@ end Base.rpad(v::AbstractVector, n::Integer, p) = [v; fill(p, max(n - length(v), 0))] -function batchseq(xs, pad, n = maximum(length(x) for x in xs)) +""" + batchseq(seqs, pad) + +Take a list of `N` sequences, and turn them into a single sequence where each +item is a batch of `N`. Short sequences will be padded by `pad`. + +```julia +julia> batchseq([[1, 2, 3], [4, 5]], 0) +3-element Array{Array{Int64,1},1}: + [1, 4] + [2, 5] + [3, 0] +``` +""" +function batchseq(xs, pad = nothing, n = maximum(length(x) for x in xs)) xs_ = [rpad(x, n, pad) for x in xs] [batch([xs_[j][i] for j = 1:length(xs_)]) for i = 1:n] end