Add module to make iris dataset available.

This commit is contained in:
Josh Whittemore 2019-02-06 16:17:59 -08:00 committed by Joshua Whittemore
parent bc12a4d55a
commit 930ebaf217
3 changed files with 92 additions and 0 deletions

View File

@ -6,6 +6,7 @@ AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Juno = "e5e0dc1b-0480-54bc-9374-aad01c23163d" Juno = "e5e0dc1b-0480-54bc-9374-aad01c23163d"

View File

@ -39,4 +39,7 @@ include("tree.jl")
include("sentiment.jl") include("sentiment.jl")
using .Sentiment using .Sentiment
include("iris.jl")
export Iris
end end

88
src/data/iris.jl Normal file
View File

@ -0,0 +1,88 @@
"""
Iris
Fisher's classic iris dataset.
Measurements from 3 different species of iris: setosa, versicolor and
virginica. There are 50 examples of each species.
There are 4 measurements for each example: sepal length, sepal width, petal
length and petal width. The measurements are in centimeters.
The module retrieves the data from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/iris).
"""
module Iris
using DelimitedFiles
using ..Data: deps, download_and_verify
const cache_prefix = ""
# Uncomment if the iris.data file is cached to cache.julialang.org.
# const cache_prefix = "https://cache.julialang.org/"
function load()
isfile(deps("iris.data")) && return
@info "Downloading iris dataset."
download_and_verify("$(cache_prefix)https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",
deps("iris.data"),
"6f608b71a7317216319b4d27b4d9bc84e6abd734eda7872b71a458569e2656c0")
end
"""
labels()
Get the labels of the iris dataset, a 150 element array of strings listing the
species of each example.
```jldoctest
julia> labels = Flux.Data.Iris.labels();
julia> summary(labels)
"150-element Array{String,1}"
julia> labels[1]
"Iris-setosa"
```
"""
function labels()
load()
iris = readdlm(deps("iris.data"), ',')
Vector{String}(iris[1:end, end])
end
"""
features()
Get the features of the iris dataset. This is a 4x150 matrix of Float64
elements. It has a row for each feature (sepal length, sepal width,
petal length, petal width) and a column for each example.
```jldoctest
julia> features = Flux.Data.Iris.features();
julia> summary(features)
"4×150 Array{Float64,2}"
julia> features[:, 1]
4-element Array{Float64,1}:
5.1
3.5
1.4
0.2
```
"""
function features()
load()
iris = readdlm(deps("iris.data"), ',')
Matrix{Float64}(iris[1:end, 1:4]')
end
end