add cmudict dataset
This commit is contained in:
parent
0f8ba87dc6
commit
e7a510da9a
|
@ -4,4 +4,4 @@
|
||||||
docs/build/
|
docs/build/
|
||||||
docs/site/
|
docs/site/
|
||||||
docs/flux.css
|
docs/flux.css
|
||||||
demos
|
deps
|
||||||
|
|
|
@ -29,4 +29,6 @@ include("layers/basic.jl")
|
||||||
include("layers/recurrent.jl")
|
include("layers/recurrent.jl")
|
||||||
include("layers/normalisation.jl")
|
include("layers/normalisation.jl")
|
||||||
|
|
||||||
|
include("data/Data.jl")
|
||||||
|
|
||||||
end # module
|
end # module
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
module Data
|
||||||
|
|
||||||
|
export CMUDict, cmudict
|
||||||
|
|
||||||
|
deps(path...) = joinpath(@__DIR__, "..", "..", "deps", path...)
|
||||||
|
|
||||||
|
function __init__()
|
||||||
|
mkpath(deps())
|
||||||
|
end
|
||||||
|
|
||||||
|
include("cmudict.jl")
|
||||||
|
using .CMUDict
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,42 @@
|
||||||
|
module CMUDict
|
||||||
|
|
||||||
|
export cmudict
|
||||||
|
|
||||||
|
using ..Data: deps
|
||||||
|
|
||||||
|
const version = "0.7b"
|
||||||
|
|
||||||
|
function load()
|
||||||
|
isdir(deps("cmudict")) && return
|
||||||
|
mkpath(deps("cmudict"))
|
||||||
|
for x in ["", ".phones", ".symbols"]
|
||||||
|
download("http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x",
|
||||||
|
deps("cmudict", "cmudict$x"))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function phones()
|
||||||
|
load()
|
||||||
|
Symbol.(first.(split.(split(readstring(deps("cmudict", "cmudict.phones")),
|
||||||
|
"\n", keep = false), "\t")))
|
||||||
|
end
|
||||||
|
|
||||||
|
function symbols()
|
||||||
|
load()
|
||||||
|
Symbol.(split(readstring(deps("CMUDict", "cmudict.symbols")),
|
||||||
|
"\n", keep = false))
|
||||||
|
end
|
||||||
|
|
||||||
|
function rawdict()
|
||||||
|
load()
|
||||||
|
Dict(String(xs[1]) => Symbol.(xs[2:end]) for xs in
|
||||||
|
filter(!isempty, split.(split(readstring(deps("CMUDict", "cmudict")), "\n"))))
|
||||||
|
end
|
||||||
|
|
||||||
|
validword(s) = ismatch(r"^[\w-\.]+$", s)
|
||||||
|
|
||||||
|
cmudict() = filter((s, ps) -> validword(s), rawdict())
|
||||||
|
|
||||||
|
alphabet() = ['A':'Z'..., '0':'9'..., '_', '-', '.']
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,3 @@
|
||||||
|
using Flux.Data
|
||||||
|
|
||||||
|
@test cmudict()["CATASTROPHE"] == :[K,AH0,T,AE1,S,T,R,AH0,F,IY0].args
|
Loading…
Reference in New Issue