2017-11-01 16:01:34 +00:00
|
|
|
module CMUDict
|
|
|
|
|
|
|
|
export cmudict
|
|
|
|
|
|
|
|
using ..Data: deps
|
|
|
|
|
|
|
|
const version = "0.7b"
|
2018-03-06 16:29:44 +00:00
|
|
|
const cache_prefix = "https://cache.julialang.org"
|
2017-11-01 16:01:34 +00:00
|
|
|
|
|
|
|
function load()
|
2018-03-06 16:29:44 +00:00
|
|
|
suffixes = ["", ".phones", ".symbols"]
|
|
|
|
if isdir(deps("cmudict"))
|
2018-04-15 19:09:08 +00:00
|
|
|
if all(isfile(deps("cmudict", "cmudict$x")) for x in suffixes)
|
2018-03-06 16:29:44 +00:00
|
|
|
return
|
|
|
|
end
|
|
|
|
end
|
2018-08-11 13:50:11 +00:00
|
|
|
@info "Downloading CMUDict dataset"
|
2017-11-01 16:01:34 +00:00
|
|
|
mkpath(deps("cmudict"))
|
2018-03-06 16:29:44 +00:00
|
|
|
for x in suffixes
|
2018-04-15 19:09:08 +00:00
|
|
|
download("$cache_prefix/http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x",
|
|
|
|
deps("cmudict", "cmudict$x"))
|
2017-11-01 16:01:34 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
function phones()
|
|
|
|
load()
|
2018-07-18 07:01:06 +00:00
|
|
|
Symbol.(first.(split.(split(read(deps("cmudict", "cmudict.phones"),String),
|
2018-07-18 13:39:20 +00:00
|
|
|
"\n", keepempty = false), "\t")))
|
2017-11-01 16:01:34 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
function symbols()
|
|
|
|
load()
|
2018-07-18 07:01:06 +00:00
|
|
|
Symbol.(split(read(deps("cmudict", "cmudict.symbols"),String),
|
2018-07-18 13:39:20 +00:00
|
|
|
"\n", keepempty = false))
|
2017-11-01 16:01:34 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
function rawdict()
|
|
|
|
load()
|
|
|
|
Dict(String(xs[1]) => Symbol.(xs[2:end]) for xs in
|
2018-07-18 07:01:06 +00:00
|
|
|
filter(!isempty, split.(split(read(deps("cmudict", "cmudict"),String), "\n"))))
|
2017-11-01 16:01:34 +00:00
|
|
|
end
|
|
|
|
|
2018-07-18 07:01:06 +00:00
|
|
|
validword(s) = isascii(s) && occursin(r"^[\w\-\.]+$", s)
|
2017-11-01 16:01:34 +00:00
|
|
|
|
2018-07-18 13:39:20 +00:00
|
|
|
cmudict() = filter(p -> validword(p.first), rawdict())
|
2017-11-01 16:01:34 +00:00
|
|
|
|
|
|
|
alphabet() = ['A':'Z'..., '0':'9'..., '_', '-', '.']
|
|
|
|
|
|
|
|
end
|