diff --git a/.gitignore b/.gitignore index 785b9c4e..9d6de240 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ docs/build/ docs/site/ docs/flux.css -demos +deps diff --git a/src/Flux.jl b/src/Flux.jl index e4b6c832..242c8b1f 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -29,4 +29,6 @@ include("layers/basic.jl") include("layers/recurrent.jl") include("layers/normalisation.jl") +include("data/Data.jl") + end # module diff --git a/src/data/Data.jl b/src/data/Data.jl new file mode 100644 index 00000000..ffea729c --- /dev/null +++ b/src/data/Data.jl @@ -0,0 +1,14 @@ +module Data + +export CMUDict, cmudict + +deps(path...) = joinpath(@__DIR__, "..", "..", "deps", path...) + +function __init__() + mkpath(deps()) +end + +include("cmudict.jl") +using .CMUDict + +end diff --git a/src/data/cmudict.jl b/src/data/cmudict.jl new file mode 100644 index 00000000..88b9c6c0 --- /dev/null +++ b/src/data/cmudict.jl @@ -0,0 +1,42 @@ +module CMUDict + +export cmudict + +using ..Data: deps + +const version = "0.7b" + +function load() + isdir(deps("cmudict")) && return + mkpath(deps("cmudict")) + for x in ["", ".phones", ".symbols"] + download("http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x", + deps("cmudict", "cmudict$x")) + end +end + +function phones() + load() + Symbol.(first.(split.(split(readstring(deps("cmudict", "cmudict.phones")), + "\n", keep = false), "\t"))) +end + +function symbols() + load() + Symbol.(split(readstring(deps("CMUDict", "cmudict.symbols")), + "\n", keep = false)) +end + +function rawdict() + load() + Dict(String(xs[1]) => Symbol.(xs[2:end]) for xs in + filter(!isempty, split.(split(readstring(deps("CMUDict", "cmudict")), "\n")))) +end + +validword(s) = ismatch(r"^[\w-\.]+$", s) + +cmudict() = filter((s, ps) -> validword(s), rawdict()) + +alphabet() = ['A':'Z'..., '0':'9'..., '_', '-', '.'] + +end diff --git a/test/data.jl b/test/data.jl new file mode 100644 index 00000000..1b93ab3c --- /dev/null +++ b/test/data.jl @@ -0,0 +1,3 @@ +using Flux.Data + +@test cmudict()["CATASTROPHE"] == :[K,AH0,T,AE1,S,T,R,AH0,F,IY0].args