Flux.jl/src/data/sentiment.jl

46 lines
991 B
Julia
Raw Normal View History

2017-11-02 11:41:28 +00:00
module Sentiment
2017-11-06 12:01:47 +00:00
using ZipFile
2017-11-02 11:41:28 +00:00
using ..Data: deps
function load()
2017-11-06 12:01:47 +00:00
isfile(deps("sentiment.zip")) ||
download("https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip",
deps("sentiment.zip"))
2017-11-02 11:41:28 +00:00
return
end
getfile(r, name) = r.files[findfirst(x -> x.name == name, r.files)]
2017-11-06 12:01:47 +00:00
function getfile(name)
r = ZipFile.Reader(deps("sentiment.zip"))
text = readstring(getfile(r, "trees/$name"))
2017-11-02 11:41:28 +00:00
close(r)
2017-11-06 12:01:47 +00:00
return text
2017-11-02 11:41:28 +00:00
end
2017-11-06 12:01:47 +00:00
using ..Flux.Batches
totree_(n, w) = Tree{Any}((parse(Int, n), w))
totree_(n, a, b) = Tree{Any}((parse(Int, n), nothing), totree(a), totree(b))
totree(t::Expr) = totree_(t.args...)
function parsetree(s)
s = replace(s, r"\$", s -> "\\\$")
s = replace(s, r"[^\s\(\)]+", s -> "\"$s\"")
s = replace(s, " ", ", ")
return totree(parse(s))
end
function gettrees(name)
load()
ss = split(getfile("$name.txt"), '\n', keep = false)
return parsetree.(ss)
end
train() = gettrees("train")
test() = gettrees("test")
dev() = gettrees("dev")
2017-11-02 11:41:28 +00:00
end