Flux.jl/src/data/cmudict.jl

44 lines
1004 B
Julia
Raw Normal View History

2017-11-01 16:01:34 +00:00
module CMUDict
export cmudict
using ..Data: deps
const version = "0.7b"
function load()
isdir(deps("cmudict")) && return
mkpath(deps("cmudict"))
for x in ["", ".phones", ".symbols"]
download("http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x",
deps("cmudict", "cmudict$x"))
end
end
function phones()
load()
Symbol.(first.(split.(split(readstring(deps("cmudict", "cmudict.phones")),
"\n", keep = false), "\t")))
end
function symbols()
load()
Symbol.(split(readstring(deps("CMUDict", "cmudict.symbols")),
"\n", keep = false))
end
function rawdict()
load()
Dict(String(xs[1]) => Symbol.(xs[2:end]) for xs in
filter(!isempty, split.(split(readstring(deps("CMUDict", "cmudict")), "\n"))))
end
2017-10-12 08:31:38 +00:00
# validword(s) = ismatch(r"^[\w-\.]+$", s)
validword(s) = ismatch(r"^\[\w-\.\]+$", s)
2017-11-01 16:01:34 +00:00
cmudict() = filter((s, ps) -> validword(s), rawdict())
alphabet() = ['A':'Z'..., '0':'9'..., '_', '-', '.']
end