2017-11-02 11:41:28 +00:00
|
|
|
module Sentiment
|
|
|
|
|
2017-11-06 12:01:47 +00:00
|
|
|
using ZipFile
|
2017-11-02 11:41:28 +00:00
|
|
|
using ..Data: deps
|
|
|
|
|
|
|
|
function load()
|
2017-11-06 12:01:47 +00:00
|
|
|
isfile(deps("sentiment.zip")) ||
|
|
|
|
download("https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip",
|
|
|
|
deps("sentiment.zip"))
|
2017-11-02 11:41:28 +00:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
getfile(r, name) = r.files[findfirst(x -> x.name == name, r.files)]
|
|
|
|
|
2017-11-06 12:01:47 +00:00
|
|
|
function getfile(name)
|
|
|
|
r = ZipFile.Reader(deps("sentiment.zip"))
|
|
|
|
text = readstring(getfile(r, "trees/$name"))
|
2017-11-02 11:41:28 +00:00
|
|
|
close(r)
|
2017-11-06 12:01:47 +00:00
|
|
|
return text
|
2017-11-02 11:41:28 +00:00
|
|
|
end
|
|
|
|
|
2017-11-06 12:01:47 +00:00
|
|
|
using ..Flux.Batches
|
|
|
|
|
|
|
|
totree_(n, w) = Tree{Any}((parse(Int, n), w))
|
|
|
|
totree_(n, a, b) = Tree{Any}((parse(Int, n), nothing), totree(a), totree(b))
|
|
|
|
totree(t::Expr) = totree_(t.args...)
|
|
|
|
|
|
|
|
function parsetree(s)
|
|
|
|
s = replace(s, r"\$", s -> "\\\$")
|
|
|
|
s = replace(s, r"[^\s\(\)]+", s -> "\"$s\"")
|
|
|
|
s = replace(s, " ", ", ")
|
|
|
|
return totree(parse(s))
|
|
|
|
end
|
|
|
|
|
|
|
|
function gettrees(name)
|
|
|
|
load()
|
|
|
|
ss = split(getfile("$name.txt"), '\n', keep = false)
|
|
|
|
return parsetree.(ss)
|
|
|
|
end
|
|
|
|
|
|
|
|
train() = gettrees("train")
|
|
|
|
test() = gettrees("test")
|
|
|
|
dev() = gettrees("dev")
|
|
|
|
|
2017-11-02 11:41:28 +00:00
|
|
|
end
|