diff --git a/src/data/cmudict.jl b/src/data/cmudict.jl index 2a26b691..b3257734 100644 --- a/src/data/cmudict.jl +++ b/src/data/cmudict.jl @@ -40,7 +40,7 @@ function rawdict() filter(!isempty, split.(split(readstring(deps("cmudict", "cmudict")), "\n")))) end -validword(s) = ismatch(r"^[\w\-\.]+$", s) +validword(s) = isascii(s) && ismatch(r"^[\w\-\.]+$", s) cmudict() = filter((s, ps) -> validword(s), rawdict())