[ADD] honors material
This commit is contained in:
parent
f1f66b5d2e
commit
9c0c146183
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,244 @@
|
||||||
|
# COLLECTIONS
|
||||||
|
# -----------
|
||||||
|
|
||||||
|
# * Collections are groups of elements
|
||||||
|
# * Elements are values of different Julia data types
|
||||||
|
# * Storing elements in collections is one of the most useful
|
||||||
|
# operations in computing
|
||||||
|
|
||||||
|
# I ARRAYS
|
||||||
|
|
||||||
|
# * Arrays are collections of values separated with commas and
|
||||||
|
# them inside of square brackets
|
||||||
|
# * They are represented in column or in row form
|
||||||
|
|
||||||
|
# 1 Like a column vector (click on the downward arrow)
|
||||||
|
array1 = [1, 2, 3]
|
||||||
|
typeof(array1)
|
||||||
|
|
||||||
|
# 2 Like row vector (click on the downward arrow)
|
||||||
|
array2 = [1 2 3]
|
||||||
|
typeof(array2)
|
||||||
|
|
||||||
|
# 3 The transpose converts betwee the two
|
||||||
|
transpose(array1)
|
||||||
|
#The apostrophe is an alternative notation
|
||||||
|
array1'
|
||||||
|
|
||||||
|
# 4 Boolean logic (==)
|
||||||
|
transpose(array1) == array1'
|
||||||
|
|
||||||
|
# 5 Data type inheritance
|
||||||
|
#With a mix of types, all the elements inherent the "highest" type
|
||||||
|
array2 = [1, 2, 3.0]
|
||||||
|
#Index for one of the original integers will be Float64
|
||||||
|
array2[1]
|
||||||
|
|
||||||
|
# 6 Column-wise entry of multidimensional array
|
||||||
|
array3 = [[1, 2, 3] [4, 5, 6] [7, 8, 9]]
|
||||||
|
|
||||||
|
# 7 Row-wise entry of multidimensional array
|
||||||
|
array4 = [[1 2 3]; [4 5 6]; [7 8 9]]
|
||||||
|
|
||||||
|
# 8 Length of array
|
||||||
|
length(array3)
|
||||||
|
length(array4)
|
||||||
|
|
||||||
|
# 9 Index order of column-wise array
|
||||||
|
for i in 1:length(array3)
|
||||||
|
println("Element $(i) is ", array3[i])
|
||||||
|
end
|
||||||
|
|
||||||
|
# 10 Index order of row-wise array
|
||||||
|
for i in 1:length(array4)
|
||||||
|
println("Element $(i) is ", array4[i])
|
||||||
|
end
|
||||||
|
|
||||||
|
# 11 Using repeat() to repeat elements
|
||||||
|
repeat([1, 2], 3)
|
||||||
|
|
||||||
|
# 12 Using range(start, step, number of elements)
|
||||||
|
range(1, step = 1, length = 10)
|
||||||
|
typeof(range(1, step = 1, length = 10))
|
||||||
|
|
||||||
|
# 13 Create collections usin gthe collect() function
|
||||||
|
collect(range(1, step = 1, length = 10))
|
||||||
|
#Short-hand
|
||||||
|
collect(1:10)
|
||||||
|
typeof(1:10)
|
||||||
|
#Add step size
|
||||||
|
collect(2:2:10)
|
||||||
|
typeof(collect(2:2:10))
|
||||||
|
|
||||||
|
# 14 Creating empty array with two rows and three columns
|
||||||
|
array5 = Array{Union{Missing, Int}}(missing, 2, 3)
|
||||||
|
|
||||||
|
# 15 Reshaping
|
||||||
|
reshape(array5, 3, 2)
|
||||||
|
|
||||||
|
# 16 Indexing (slicing)
|
||||||
|
#Random uniform distribution of values in closed domain [10,20]
|
||||||
|
#Shape 10 x 5
|
||||||
|
array6 = rand(10:20, 10, 5)
|
||||||
|
#All rows in first column
|
||||||
|
array6[:, 1]
|
||||||
|
#Rows two through five of second column
|
||||||
|
array6[2:5, 2]
|
||||||
|
#Values in rows 2, 4, 6, and in columns 1 and 5
|
||||||
|
array6[[2, 4, 6], [1, 5]]
|
||||||
|
#Values in row 1 from column 3 to the last column
|
||||||
|
array6[1, 3:end]
|
||||||
|
# Boolean logic (returning only true and false)
|
||||||
|
array6[:, 1] .> 12
|
||||||
|
|
||||||
|
# 17 Changing element values
|
||||||
|
array7 = [1, 2, 3, 4, 5]
|
||||||
|
#Permanantly append 10 to end of array
|
||||||
|
push!(array7, 10)
|
||||||
|
#Remove last element
|
||||||
|
#Only the removed value will be displayed
|
||||||
|
pop!(array7)
|
||||||
|
array7
|
||||||
|
#Change second element value to 1000
|
||||||
|
array7[2] = 1000
|
||||||
|
array7
|
||||||
|
|
||||||
|
# 18 List comprehension
|
||||||
|
array8 = [3 * i for i in 1:5]
|
||||||
|
#Column-wise collection iterating through second element first
|
||||||
|
array9 = [a * b for a in 1:3, b in 1:3]
|
||||||
|
|
||||||
|
|
||||||
|
# 19 Arithmetic on arrays
|
||||||
|
#Elementwise addition of a scalar using dot notation
|
||||||
|
array8 .+ 1
|
||||||
|
#Elementwise addition of similar sized arrays
|
||||||
|
array7 + array8
|
||||||
|
|
||||||
|
# 20 Missing
|
||||||
|
# * Missing is a Julia data type
|
||||||
|
# * Provides a placeholder for missing data in a statistical sense
|
||||||
|
# * Propagates automatically
|
||||||
|
# * Equality as a type can be tested
|
||||||
|
# * Sorting is possible since missing is seen as greater than other values
|
||||||
|
|
||||||
|
#Propagation
|
||||||
|
missing + 1
|
||||||
|
missing > 1
|
||||||
|
[1, 2, 3, missing, 5] + [10, 20, 30, 40 ,50]
|
||||||
|
#Cannot return true or false since value is not known
|
||||||
|
missing == missing
|
||||||
|
#Equality
|
||||||
|
missing === missing
|
||||||
|
isequal(missing, missing)
|
||||||
|
#Sorting with isless()
|
||||||
|
isless(1, missing)
|
||||||
|
isless(Inf, missing)
|
||||||
|
|
||||||
|
# 21 Array of integer zeros
|
||||||
|
array11 = zeros(Int8, 3, 3)
|
||||||
|
|
||||||
|
# 22 Array of floating point ones
|
||||||
|
array12 = ones(Float16, 3, 3)
|
||||||
|
|
||||||
|
# 23 Array of true (bit array) values
|
||||||
|
array13 = trues(3, 3)
|
||||||
|
|
||||||
|
# 24 Fill an array with n elements of value x
|
||||||
|
array14 = fill(10, 3, 3)
|
||||||
|
|
||||||
|
# 25 Convert elements to a different data type
|
||||||
|
convert.(Float16, array14)
|
||||||
|
|
||||||
|
# 26 Concatenation
|
||||||
|
#Concatenate arrays along rows (makes row)
|
||||||
|
array15 = [1, 2, 3]
|
||||||
|
array16 = [10, 20, 30]
|
||||||
|
cat(array15, array16, dims = 1)
|
||||||
|
#Same as above
|
||||||
|
vcat(array15, array16)
|
||||||
|
#Concatenate arrays along columns (makes colums)
|
||||||
|
cat(array15, array16, dims = 2)
|
||||||
|
#Same as above
|
||||||
|
hcat(array15, array16)
|
||||||
|
|
||||||
|
# II TUPLES
|
||||||
|
|
||||||
|
# * Tuples are immutable collections
|
||||||
|
|
||||||
|
# 1 Tuples with mixed types
|
||||||
|
tuple1 = (1, 2, 3, 4, "Julia")
|
||||||
|
#For loop to look at value and type of each element
|
||||||
|
for i in 1:length(tuple1)
|
||||||
|
println(" The value of the tuple at index number $(i) is $(tuple1[i]) and the type is $(typeof(tuple1[i])).")
|
||||||
|
end
|
||||||
|
|
||||||
|
# 2 Each element can be named
|
||||||
|
a, b, c, seven = (1, 3, 5, 7)
|
||||||
|
a
|
||||||
|
seven
|
||||||
|
|
||||||
|
# 2 Reverse order index (can be done with arrays too)
|
||||||
|
tuple1[end:-1:1]
|
||||||
|
|
||||||
|
# 3 Mixed length tuples
|
||||||
|
tuple2 = ((1, 2, 3), 1, 2, (3, 100, 1))
|
||||||
|
#Element 4
|
||||||
|
tuple2[4]
|
||||||
|
#Element 2 in element 4
|
||||||
|
tuple2[4][2]
|
||||||
|
|
||||||
|
# III DICTIONARIES
|
||||||
|
|
||||||
|
# * Dictionaries are collection sof key-value pairs
|
||||||
|
|
||||||
|
# 1 Example of a dictionary
|
||||||
|
dictionary1 = Dict(1 => 77, 2 => 66, 3 => 1)
|
||||||
|
#The => is shorthand for the Pair() function
|
||||||
|
dictionary2 = Dict(Pair(1,100), Pair(2,200), Pair(3,300))
|
||||||
|
|
||||||
|
# 2 Specifying types
|
||||||
|
dictionary3 = Dict{Any, Any}(1 => 77, 2 => 66, 3 => "three")
|
||||||
|
#We can get a bit crazy
|
||||||
|
dictionary4 = Dict{Any, Any}("a" => 1, (2, 3) => "hello")
|
||||||
|
|
||||||
|
# 3 Using symbols as keys
|
||||||
|
dictionary5 = Dict(:A => 300, :B => 305, :C => 309)
|
||||||
|
dictionary5[:A]
|
||||||
|
|
||||||
|
# 4 Using in() to check on key-value pairs
|
||||||
|
in((:A => 300), dictionary5)
|
||||||
|
|
||||||
|
# 5 Changing an existing value
|
||||||
|
dictionary5[:C] = 1000
|
||||||
|
dictionary5
|
||||||
|
|
||||||
|
# 6 Using the delete!() function
|
||||||
|
delete!(dictionary5, :A)
|
||||||
|
|
||||||
|
# 7 The keys of a dictionary
|
||||||
|
keys(dictionary5)
|
||||||
|
|
||||||
|
# 8 The values of a dictionary
|
||||||
|
values(dictionary5)
|
||||||
|
|
||||||
|
# 8 Creating a dictionary with automatic keys
|
||||||
|
procedure_vals = ["Appendectomy", "Colectomy", "Cholecystectomy"]
|
||||||
|
procedure_dict = Dict{AbstractString,AbstractString}()
|
||||||
|
for (s, n) in enumerate(procedure_vals)
|
||||||
|
procedure_dict["x_$(s)"] = n
|
||||||
|
end
|
||||||
|
#Procedure_dict is now a dictionary
|
||||||
|
procedure_dict
|
||||||
|
|
||||||
|
# 9 Iterating through a dictionary by key and value
|
||||||
|
for (k, v) in procedure_dict
|
||||||
|
println(k, " is ",v)
|
||||||
|
end
|
||||||
|
|
||||||
|
# 10 Sorting
|
||||||
|
dictionary6 = Dict("a"=> 1,"b"=>2 ,"c"=>3 ,"d"=>4 ,"e"=>5 ,"f"=>6)
|
||||||
|
# Sorting using a for loop
|
||||||
|
for k in sort(collect(keys(dictionary6)))
|
||||||
|
println("$(k) is $(dictionary6[k])")
|
||||||
|
end
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,104 @@
|
||||||
|
+(2, 2)
|
||||||
|
versioninfo()
|
||||||
|
# FUNCTIONS IN JULIA 1.0 (0.7+)
|
||||||
|
# ----------------------
|
||||||
|
|
||||||
|
# * A function maps a tuple of arguments to a return value
|
||||||
|
|
||||||
|
# I Creating basic Functions
|
||||||
|
|
||||||
|
# 1 using the function keyword
|
||||||
|
#Create a function named my_addition
|
||||||
|
#Takes two arguments
|
||||||
|
#Return the addition of the two values
|
||||||
|
function my_addition(x, y)
|
||||||
|
return x + y
|
||||||
|
end
|
||||||
|
|
||||||
|
# 2 Calling a function
|
||||||
|
#Call the function with two argument values
|
||||||
|
my_addition(3, 4)
|
||||||
|
|
||||||
|
# 3 Built-in functions
|
||||||
|
#The plus, +, symbol (as other arithmetical symbols) are built-in functions
|
||||||
|
+(3, 4)
|
||||||
|
|
||||||
|
# 4 Using a Unicode symbol as a function name
|
||||||
|
function Σ(x, y)
|
||||||
|
return x + y
|
||||||
|
end
|
||||||
|
|
||||||
|
Σ(3, 4)
|
||||||
|
|
||||||
|
# II Anonymous functions
|
||||||
|
|
||||||
|
# * Functions can be assigned to variables
|
||||||
|
# * Functions can be used as arguments
|
||||||
|
# * Functions can be returned as values
|
||||||
|
|
||||||
|
# 1 An anonymous function
|
||||||
|
x -> x^2 + 3
|
||||||
|
|
||||||
|
# 2 A function as an argument
|
||||||
|
#Passing the round() function as argument to the map() function
|
||||||
|
map(round, [2.1, 3.4, 7.9])
|
||||||
|
#Passing an anonymous function as an arguemt to the map() function
|
||||||
|
map(x -> x^2, [2, 3, 4])
|
||||||
|
|
||||||
|
# III Tuples and functions
|
||||||
|
|
||||||
|
# * Tuples are immutable collections
|
||||||
|
|
||||||
|
# 1 Examples of tuples
|
||||||
|
my_tuple = (1, "Julia", 7)
|
||||||
|
typeof(my_tuple)
|
||||||
|
|
||||||
|
# * Single value tuple must have a comma
|
||||||
|
my_second_tuple = (4,)
|
||||||
|
typeof(my_second_tuple)
|
||||||
|
|
||||||
|
# 2 Indexing a tuple
|
||||||
|
length(my_tuple)
|
||||||
|
my_tuple[2]
|
||||||
|
|
||||||
|
# 3 Tuple indexing
|
||||||
|
# * Named tuple creates a name for each element
|
||||||
|
my_other_tuple = (a = 4, b = "Julia", c = 3)
|
||||||
|
#Indexing tuple by name
|
||||||
|
my_other_tuple.b
|
||||||
|
|
||||||
|
# 4 Function returns
|
||||||
|
# * Multiple return values of a function are tuples
|
||||||
|
function my_function(a, b)
|
||||||
|
return a + b, a - b
|
||||||
|
end
|
||||||
|
#Calling the function
|
||||||
|
my_function(10, 5)
|
||||||
|
#Looking up the type of the function return
|
||||||
|
typeof(my_function(10, 5))
|
||||||
|
#This allows for each element to be given a variable
|
||||||
|
r, s = my_function(10, 5)
|
||||||
|
r
|
||||||
|
s
|
||||||
|
|
||||||
|
# IV Functions with keyword arguments
|
||||||
|
|
||||||
|
# * Keyword arguments are added after semi-colon
|
||||||
|
# * Their order is not explicit
|
||||||
|
# * Default values are addded
|
||||||
|
|
||||||
|
# 1 Creating a function with a keyword argument
|
||||||
|
function my_keyword_function(x, y; z = 3)
|
||||||
|
return x + y + z
|
||||||
|
end
|
||||||
|
|
||||||
|
# * Omission of keyword argument uses default
|
||||||
|
my_keyword_function(1, 2)
|
||||||
|
# * Keyword argument names must be used
|
||||||
|
my_keyword_function(1, 2, z = 10)
|
||||||
|
|
||||||
|
# 4 Use of dot notation for functions
|
||||||
|
|
||||||
|
# * Passes a collection elementwise to a function
|
||||||
|
# * Use instead of map()
|
||||||
|
sin.([0., π/2., π, 3/2. * π, π])
|
|
@ -0,0 +1,569 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Week 4 Peer Review"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"2. In a code cell below, import the required packages: Distributions, DataFrames, and Random (install these packages via the REPL if required)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Import the required packages\n",
|
||||||
|
"using Distributions, DataFrames, Random"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Seed the random number generator\n",
|
||||||
|
"Random.seed!(1234);"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"3. In a code cell below, create a dataframe named df1, with 30 rows and 4 columns (variables). Call the first column ID. It should hold the values 1 through 30 (to make up 30 rows). Use three rand() function calls to generate three more columns named var1, var2, and var3. The second column (var1) should consist of 30 values from a standard normal distribution (mean of 0 and standard deviation of 1). The third column (var2) should consist of 30 random value from a normal distribution with a mean of 10 and a standard deviation of 2. The last column (var3) should contain 30 random values chosen from a range of integers between (and including) 5 and 15."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th></tr></thead><tbody><p>30 rows × 4 columns</p><tr><th>1</th><td>1</td><td>0.867347</td><td>7.44066</td><td>14</td></tr><tr><th>2</th><td>2</td><td>-0.901744</td><td>11.9946</td><td>13</td></tr><tr><th>3</th><td>3</td><td>-0.494479</td><td>10.6048</td><td>12</td></tr><tr><th>4</th><td>4</td><td>-0.902914</td><td>9.92711</td><td>9</td></tr><tr><th>5</th><td>5</td><td>0.864401</td><td>10.2839</td><td>15</td></tr><tr><th>6</th><td>6</td><td>2.21188</td><td>11.0425</td><td>14</td></tr><tr><th>7</th><td>7</td><td>0.532813</td><td>11.7935</td><td>15</td></tr><tr><th>8</th><td>8</td><td>-0.271735</td><td>8.97294</td><td>9</td></tr><tr><th>9</th><td>9</td><td>0.502334</td><td>8.4704</td><td>9</td></tr><tr><th>10</th><td>10</td><td>-0.516984</td><td>6.91715</td><td>8</td></tr><tr><th>11</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td></tr><tr><th>12</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td></tr><tr><th>13</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td></tr><tr><th>14</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td></tr><tr><th>15</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td></tr><tr><th>16</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td></tr><tr><th>17</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td></tr><tr><th>18</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td></tr><tr><th>19</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td></tr><tr><th>20</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td></tr><tr><th>21</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td></tr><tr><th>22</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td></tr><tr><th>23</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td></tr><tr><th>24</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td></tr><tr><th>25</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td></tr><tr><th>26</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td></tr><tr><th>27</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td></tr><tr><th>28</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td></tr><tr><th>29</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td></tr><tr><th>30</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td></tr></tbody></table>"
|
||||||
|
],
|
||||||
|
"text/latex": [
|
||||||
|
"\\begin{tabular}{r|cccc}\n",
|
||||||
|
"\t& ID & var1 & var2 & var3\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t& Int64 & Float64 & Float64 & Int64\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t1 & 1 & 0.867347 & 7.44066 & 14 \\\\\n",
|
||||||
|
"\t2 & 2 & -0.901744 & 11.9946 & 13 \\\\\n",
|
||||||
|
"\t3 & 3 & -0.494479 & 10.6048 & 12 \\\\\n",
|
||||||
|
"\t4 & 4 & -0.902914 & 9.92711 & 9 \\\\\n",
|
||||||
|
"\t5 & 5 & 0.864401 & 10.2839 & 15 \\\\\n",
|
||||||
|
"\t6 & 6 & 2.21188 & 11.0425 & 14 \\\\\n",
|
||||||
|
"\t7 & 7 & 0.532813 & 11.7935 & 15 \\\\\n",
|
||||||
|
"\t8 & 8 & -0.271735 & 8.97294 & 9 \\\\\n",
|
||||||
|
"\t9 & 9 & 0.502334 & 8.4704 & 9 \\\\\n",
|
||||||
|
"\t10 & 10 & -0.516984 & 6.91715 & 8 \\\\\n",
|
||||||
|
"\t11 & 11 & -0.560501 & 9.83968 & 15 \\\\\n",
|
||||||
|
"\t12 & 12 & -0.0192918 & 7.81756 & 14 \\\\\n",
|
||||||
|
"\t13 & 13 & 0.128064 & 8.83897 & 11 \\\\\n",
|
||||||
|
"\t14 & 14 & 1.85278 & 9.36913 & 10 \\\\\n",
|
||||||
|
"\t15 & 15 & -0.827763 & 7.2771 & 15 \\\\\n",
|
||||||
|
"\t16 & 16 & 0.110096 & 9.77109 & 15 \\\\\n",
|
||||||
|
"\t17 & 17 & -0.251176 & 10.3317 & 6 \\\\\n",
|
||||||
|
"\t18 & 18 & 0.369714 & 9.18312 & 5 \\\\\n",
|
||||||
|
"\t19 & 19 & 0.0721164 & 7.98043 & 12 \\\\\n",
|
||||||
|
"\t20 & 20 & -1.50343 & 8.91239 & 13 \\\\\n",
|
||||||
|
"\t21 & 21 & 1.56417 & 7.54655 & 14 \\\\\n",
|
||||||
|
"\t22 & 22 & -1.39674 & 8.91657 & 5 \\\\\n",
|
||||||
|
"\t23 & 23 & 1.1055 & 8.62701 & 8 \\\\\n",
|
||||||
|
"\t24 & 24 & -1.10673 & 8.57414 & 9 \\\\\n",
|
||||||
|
"\t25 & 25 & -3.21136 & 9.34588 & 5 \\\\\n",
|
||||||
|
"\t26 & 26 & -0.0740145 & 11.0297 & 9 \\\\\n",
|
||||||
|
"\t27 & 27 & 0.150976 & 14.8349 & 10 \\\\\n",
|
||||||
|
"\t28 & 28 & 0.769278 & 9.38405 & 14 \\\\\n",
|
||||||
|
"\t29 & 29 & -0.310153 & 12.4906 & 15 \\\\\n",
|
||||||
|
"\t30 & 30 & -0.602707 & 9.9001 & 7 \\\\\n",
|
||||||
|
"\\end{tabular}\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"30×4 DataFrame\n",
|
||||||
|
"│ Row │ ID │ var1 │ var2 │ var3 │\n",
|
||||||
|
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
|
||||||
|
"├─────┼───────┼────────────┼─────────┼───────┤\n",
|
||||||
|
"│ 1 │ 1 │ 0.867347 │ 7.44066 │ 14 │\n",
|
||||||
|
"│ 2 │ 2 │ -0.901744 │ 11.9946 │ 13 │\n",
|
||||||
|
"│ 3 │ 3 │ -0.494479 │ 10.6048 │ 12 │\n",
|
||||||
|
"│ 4 │ 4 │ -0.902914 │ 9.92711 │ 9 │\n",
|
||||||
|
"│ 5 │ 5 │ 0.864401 │ 10.2839 │ 15 │\n",
|
||||||
|
"│ 6 │ 6 │ 2.21188 │ 11.0425 │ 14 │\n",
|
||||||
|
"│ 7 │ 7 │ 0.532813 │ 11.7935 │ 15 │\n",
|
||||||
|
"│ 8 │ 8 │ -0.271735 │ 8.97294 │ 9 │\n",
|
||||||
|
"│ 9 │ 9 │ 0.502334 │ 8.4704 │ 9 │\n",
|
||||||
|
"│ 10 │ 10 │ -0.516984 │ 6.91715 │ 8 │\n",
|
||||||
|
"⋮\n",
|
||||||
|
"│ 20 │ 20 │ -1.50343 │ 8.91239 │ 13 │\n",
|
||||||
|
"│ 21 │ 21 │ 1.56417 │ 7.54655 │ 14 │\n",
|
||||||
|
"│ 22 │ 22 │ -1.39674 │ 8.91657 │ 5 │\n",
|
||||||
|
"│ 23 │ 23 │ 1.1055 │ 8.62701 │ 8 │\n",
|
||||||
|
"│ 24 │ 24 │ -1.10673 │ 8.57414 │ 9 │\n",
|
||||||
|
"│ 25 │ 25 │ -3.21136 │ 9.34588 │ 5 │\n",
|
||||||
|
"│ 26 │ 26 │ -0.0740145 │ 11.0297 │ 9 │\n",
|
||||||
|
"│ 27 │ 27 │ 0.150976 │ 14.8349 │ 10 │\n",
|
||||||
|
"│ 28 │ 28 │ 0.769278 │ 9.38405 │ 14 │\n",
|
||||||
|
"│ 29 │ 29 │ -0.310153 │ 12.4906 │ 15 │\n",
|
||||||
|
"│ 30 │ 30 │ -0.602707 │ 9.9001 │ 7 │"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df = DataFrame(ID = 1:30, var1 = rand(Normal(0,1),30), var2 = rand(Normal(10,2),30), var3 = rand(5:15,30))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"4.In code cells below, write the code to calculate the mean and variance of each column in the dataframe. For example for the first variable this could be done using the println function and referring to each column (variable) by its symbol notation. Try to shorten the code with a for-loop, iterating over the variables names (in symbol format)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"┌ Warning: `getindex(df::DataFrame, col_ind::ColumnIndex)` is deprecated, use `df[!, col_ind]` instead.\n",
|
||||||
|
"│ caller = top-level scope at In[4]:3\n",
|
||||||
|
"└ @ Core ./In[4]:3\n",
|
||||||
|
"┌ Warning: `getindex(df::DataFrame, col_ind::ColumnIndex)` is deprecated, use `df[!, col_ind]` instead.\n",
|
||||||
|
"│ caller = top-level scope at In[4]:4\n",
|
||||||
|
"└ @ Core ./In[4]:4\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"The mean of var1 is: -0.061674963752526096, the variance is: 1.1790054448274625\n",
|
||||||
|
"The mean of var2 is: 9.580613055613338, the variance is: 2.948790077536739\n",
|
||||||
|
"The mean of var3 is: 11.0, the variance is: 11.724137931034482\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for s in [:var1,:var2,:var3] #names(df)\n",
|
||||||
|
" colname = String(s)\n",
|
||||||
|
" meancol = mean(df[s])\n",
|
||||||
|
" variancecol = var(df[s])\n",
|
||||||
|
" println(\"The mean of $colname is: $meancol, the variance is: $variancecol\")\n",
|
||||||
|
"end"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"5. In a code cells below, create a new DataFrame named df2 from the last 20 rows of the original DataFrame, df1."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th></tr></thead><tbody><p>20 rows × 4 columns</p><tr><th>1</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td></tr><tr><th>2</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td></tr><tr><th>3</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td></tr><tr><th>4</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td></tr><tr><th>5</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td></tr><tr><th>6</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td></tr><tr><th>7</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td></tr><tr><th>8</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td></tr><tr><th>9</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td></tr><tr><th>10</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td></tr><tr><th>11</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td></tr><tr><th>12</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td></tr><tr><th>13</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td></tr><tr><th>14</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td></tr><tr><th>15</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td></tr><tr><th>16</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td></tr><tr><th>17</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td></tr><tr><th>18</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td></tr><tr><th>19</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td></tr><tr><th>20</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td></tr></tbody></table>"
|
||||||
|
],
|
||||||
|
"text/latex": [
|
||||||
|
"\\begin{tabular}{r|cccc}\n",
|
||||||
|
"\t& ID & var1 & var2 & var3\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t& Int64 & Float64 & Float64 & Int64\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t1 & 11 & -0.560501 & 9.83968 & 15 \\\\\n",
|
||||||
|
"\t2 & 12 & -0.0192918 & 7.81756 & 14 \\\\\n",
|
||||||
|
"\t3 & 13 & 0.128064 & 8.83897 & 11 \\\\\n",
|
||||||
|
"\t4 & 14 & 1.85278 & 9.36913 & 10 \\\\\n",
|
||||||
|
"\t5 & 15 & -0.827763 & 7.2771 & 15 \\\\\n",
|
||||||
|
"\t6 & 16 & 0.110096 & 9.77109 & 15 \\\\\n",
|
||||||
|
"\t7 & 17 & -0.251176 & 10.3317 & 6 \\\\\n",
|
||||||
|
"\t8 & 18 & 0.369714 & 9.18312 & 5 \\\\\n",
|
||||||
|
"\t9 & 19 & 0.0721164 & 7.98043 & 12 \\\\\n",
|
||||||
|
"\t10 & 20 & -1.50343 & 8.91239 & 13 \\\\\n",
|
||||||
|
"\t11 & 21 & 1.56417 & 7.54655 & 14 \\\\\n",
|
||||||
|
"\t12 & 22 & -1.39674 & 8.91657 & 5 \\\\\n",
|
||||||
|
"\t13 & 23 & 1.1055 & 8.62701 & 8 \\\\\n",
|
||||||
|
"\t14 & 24 & -1.10673 & 8.57414 & 9 \\\\\n",
|
||||||
|
"\t15 & 25 & -3.21136 & 9.34588 & 5 \\\\\n",
|
||||||
|
"\t16 & 26 & -0.0740145 & 11.0297 & 9 \\\\\n",
|
||||||
|
"\t17 & 27 & 0.150976 & 14.8349 & 10 \\\\\n",
|
||||||
|
"\t18 & 28 & 0.769278 & 9.38405 & 14 \\\\\n",
|
||||||
|
"\t19 & 29 & -0.310153 & 12.4906 & 15 \\\\\n",
|
||||||
|
"\t20 & 30 & -0.602707 & 9.9001 & 7 \\\\\n",
|
||||||
|
"\\end{tabular}\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"20×4 DataFrame\n",
|
||||||
|
"│ Row │ ID │ var1 │ var2 │ var3 │\n",
|
||||||
|
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
|
||||||
|
"├─────┼───────┼────────────┼─────────┼───────┤\n",
|
||||||
|
"│ 1 │ 11 │ -0.560501 │ 9.83968 │ 15 │\n",
|
||||||
|
"│ 2 │ 12 │ -0.0192918 │ 7.81756 │ 14 │\n",
|
||||||
|
"│ 3 │ 13 │ 0.128064 │ 8.83897 │ 11 │\n",
|
||||||
|
"│ 4 │ 14 │ 1.85278 │ 9.36913 │ 10 │\n",
|
||||||
|
"│ 5 │ 15 │ -0.827763 │ 7.2771 │ 15 │\n",
|
||||||
|
"│ 6 │ 16 │ 0.110096 │ 9.77109 │ 15 │\n",
|
||||||
|
"│ 7 │ 17 │ -0.251176 │ 10.3317 │ 6 │\n",
|
||||||
|
"│ 8 │ 18 │ 0.369714 │ 9.18312 │ 5 │\n",
|
||||||
|
"│ 9 │ 19 │ 0.0721164 │ 7.98043 │ 12 │\n",
|
||||||
|
"│ 10 │ 20 │ -1.50343 │ 8.91239 │ 13 │\n",
|
||||||
|
"│ 11 │ 21 │ 1.56417 │ 7.54655 │ 14 │\n",
|
||||||
|
"│ 12 │ 22 │ -1.39674 │ 8.91657 │ 5 │\n",
|
||||||
|
"│ 13 │ 23 │ 1.1055 │ 8.62701 │ 8 │\n",
|
||||||
|
"│ 14 │ 24 │ -1.10673 │ 8.57414 │ 9 │\n",
|
||||||
|
"│ 15 │ 25 │ -3.21136 │ 9.34588 │ 5 │\n",
|
||||||
|
"│ 16 │ 26 │ -0.0740145 │ 11.0297 │ 9 │\n",
|
||||||
|
"│ 17 │ 27 │ 0.150976 │ 14.8349 │ 10 │\n",
|
||||||
|
"│ 18 │ 28 │ 0.769278 │ 9.38405 │ 14 │\n",
|
||||||
|
"│ 19 │ 29 │ -0.310153 │ 12.4906 │ 15 │\n",
|
||||||
|
"│ 20 │ 30 │ -0.602707 │ 9.9001 │ 7 │"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df2 = df[11:end,:]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"6. In a code cells below, show the results of computing simple descriptive statistics on this new DataFrame using the describe() function."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>mean</th><th>min</th><th>median</th><th>max</th><th>nunique</th><th>nmissing</th><th>eltype</th></tr><tr><th></th><th>Symbol</th><th>Float64</th><th>Real</th><th>Float64</th><th>Real</th><th>Nothing</th><th>Nothing</th><th>DataType</th></tr></thead><tbody><p>4 rows × 8 columns</p><tr><th>1</th><td>ID</td><td>20.5</td><td>11</td><td>20.5</td><td>30</td><td></td><td></td><td>Int64</td></tr><tr><th>2</th><td>var1</td><td>-0.187058</td><td>-3.21136</td><td>-0.0466532</td><td>1.85278</td><td></td><td></td><td>Float64</td></tr><tr><th>3</th><td>var2</td><td>9.49853</td><td>7.2771</td><td>9.2645</td><td>14.8349</td><td></td><td></td><td>Float64</td></tr><tr><th>4</th><td>var3</td><td>10.6</td><td>5</td><td>10.5</td><td>15</td><td></td><td></td><td>Int64</td></tr></tbody></table>"
|
||||||
|
],
|
||||||
|
"text/latex": [
|
||||||
|
"\\begin{tabular}{r|cccccccc}\n",
|
||||||
|
"\t& variable & mean & min & median & max & nunique & nmissing & eltype\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t& Symbol & Float64 & Real & Float64 & Real & Nothing & Nothing & DataType\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t1 & ID & 20.5 & 11 & 20.5 & 30 & & & Int64 \\\\\n",
|
||||||
|
"\t2 & var1 & -0.187058 & -3.21136 & -0.0466532 & 1.85278 & & & Float64 \\\\\n",
|
||||||
|
"\t3 & var2 & 9.49853 & 7.2771 & 9.2645 & 14.8349 & & & Float64 \\\\\n",
|
||||||
|
"\t4 & var3 & 10.6 & 5 & 10.5 & 15 & & & Int64 \\\\\n",
|
||||||
|
"\\end{tabular}\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"4×8 DataFrame. Omitted printing of 2 columns\n",
|
||||||
|
"│ Row │ variable │ mean │ min │ median │ max │ nunique │\n",
|
||||||
|
"│ │ \u001b[90mSymbol\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mReal\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mReal\u001b[39m │ \u001b[90mNothing\u001b[39m │\n",
|
||||||
|
"├─────┼──────────┼───────────┼──────────┼────────────┼─────────┼─────────┤\n",
|
||||||
|
"│ 1 │ ID │ 20.5 │ 11 │ 20.5 │ 30 │ │\n",
|
||||||
|
"│ 2 │ var1 │ -0.187058 │ -3.21136 │ -0.0466532 │ 1.85278 │ │\n",
|
||||||
|
"│ 3 │ var2 │ 9.49853 │ 7.2771 │ 9.2645 │ 14.8349 │ │\n",
|
||||||
|
"│ 4 │ var3 │ 10.6 │ 5 │ 10.5 │ 15 │ │"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"describe(df2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"7. In a code cells below, add a column named cat1 to the df2 DataFrame consisting of a random selection of 20 values from the sample space GroupA and GroupB.m"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th><th>Col1</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th><th>String</th></tr></thead><tbody><p>20 rows × 5 columns</p><tr><th>1</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td><td>GroupA</td></tr><tr><th>2</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td><td>GroupB</td></tr><tr><th>3</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td><td>GroupB</td></tr><tr><th>4</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td><td>GroupB</td></tr><tr><th>5</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td><td>GroupB</td></tr><tr><th>6</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td><td>GroupA</td></tr><tr><th>7</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td><td>GroupB</td></tr><tr><th>8</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td><td>GroupA</td></tr><tr><th>9</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td><td>GroupB</td></tr><tr><th>10</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td><td>GroupA</td></tr><tr><th>11</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td><td>GroupB</td></tr><tr><th>12</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td><td>GroupB</td></tr><tr><th>13</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td><td>GroupA</td></tr><tr><th>14</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td><td>GroupA</td></tr><tr><th>15</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td><td>GroupA</td></tr><tr><th>16</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td><td>GroupA</td></tr><tr><th>17</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td><td>GroupA</td></tr><tr><th>18</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td><td>GroupA</td></tr><tr><th>19</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td><td>GroupA</td></tr><tr><th>20</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td><td>GroupA</td></tr></tbody></table>"
|
||||||
|
],
|
||||||
|
"text/latex": [
|
||||||
|
"\\begin{tabular}{r|ccccc}\n",
|
||||||
|
"\t& ID & var1 & var2 & var3 & Col1\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t& Int64 & Float64 & Float64 & Int64 & String\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t1 & 11 & -0.560501 & 9.83968 & 15 & GroupA \\\\\n",
|
||||||
|
"\t2 & 12 & -0.0192918 & 7.81756 & 14 & GroupB \\\\\n",
|
||||||
|
"\t3 & 13 & 0.128064 & 8.83897 & 11 & GroupB \\\\\n",
|
||||||
|
"\t4 & 14 & 1.85278 & 9.36913 & 10 & GroupB \\\\\n",
|
||||||
|
"\t5 & 15 & -0.827763 & 7.2771 & 15 & GroupB \\\\\n",
|
||||||
|
"\t6 & 16 & 0.110096 & 9.77109 & 15 & GroupA \\\\\n",
|
||||||
|
"\t7 & 17 & -0.251176 & 10.3317 & 6 & GroupB \\\\\n",
|
||||||
|
"\t8 & 18 & 0.369714 & 9.18312 & 5 & GroupA \\\\\n",
|
||||||
|
"\t9 & 19 & 0.0721164 & 7.98043 & 12 & GroupB \\\\\n",
|
||||||
|
"\t10 & 20 & -1.50343 & 8.91239 & 13 & GroupA \\\\\n",
|
||||||
|
"\t11 & 21 & 1.56417 & 7.54655 & 14 & GroupB \\\\\n",
|
||||||
|
"\t12 & 22 & -1.39674 & 8.91657 & 5 & GroupB \\\\\n",
|
||||||
|
"\t13 & 23 & 1.1055 & 8.62701 & 8 & GroupA \\\\\n",
|
||||||
|
"\t14 & 24 & -1.10673 & 8.57414 & 9 & GroupA \\\\\n",
|
||||||
|
"\t15 & 25 & -3.21136 & 9.34588 & 5 & GroupA \\\\\n",
|
||||||
|
"\t16 & 26 & -0.0740145 & 11.0297 & 9 & GroupA \\\\\n",
|
||||||
|
"\t17 & 27 & 0.150976 & 14.8349 & 10 & GroupA \\\\\n",
|
||||||
|
"\t18 & 28 & 0.769278 & 9.38405 & 14 & GroupA \\\\\n",
|
||||||
|
"\t19 & 29 & -0.310153 & 12.4906 & 15 & GroupA \\\\\n",
|
||||||
|
"\t20 & 30 & -0.602707 & 9.9001 & 7 & GroupA \\\\\n",
|
||||||
|
"\\end{tabular}\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"20×5 DataFrame\n",
|
||||||
|
"│ Row │ ID │ var1 │ var2 │ var3 │ Col1 │\n",
|
||||||
|
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
|
||||||
|
"├─────┼───────┼────────────┼─────────┼───────┼────────┤\n",
|
||||||
|
"│ 1 │ 11 │ -0.560501 │ 9.83968 │ 15 │ GroupA │\n",
|
||||||
|
"│ 2 │ 12 │ -0.0192918 │ 7.81756 │ 14 │ GroupB │\n",
|
||||||
|
"│ 3 │ 13 │ 0.128064 │ 8.83897 │ 11 │ GroupB │\n",
|
||||||
|
"│ 4 │ 14 │ 1.85278 │ 9.36913 │ 10 │ GroupB │\n",
|
||||||
|
"│ 5 │ 15 │ -0.827763 │ 7.2771 │ 15 │ GroupB │\n",
|
||||||
|
"│ 6 │ 16 │ 0.110096 │ 9.77109 │ 15 │ GroupA │\n",
|
||||||
|
"│ 7 │ 17 │ -0.251176 │ 10.3317 │ 6 │ GroupB │\n",
|
||||||
|
"│ 8 │ 18 │ 0.369714 │ 9.18312 │ 5 │ GroupA │\n",
|
||||||
|
"│ 9 │ 19 │ 0.0721164 │ 7.98043 │ 12 │ GroupB │\n",
|
||||||
|
"│ 10 │ 20 │ -1.50343 │ 8.91239 │ 13 │ GroupA │\n",
|
||||||
|
"│ 11 │ 21 │ 1.56417 │ 7.54655 │ 14 │ GroupB │\n",
|
||||||
|
"│ 12 │ 22 │ -1.39674 │ 8.91657 │ 5 │ GroupB │\n",
|
||||||
|
"│ 13 │ 23 │ 1.1055 │ 8.62701 │ 8 │ GroupA │\n",
|
||||||
|
"│ 14 │ 24 │ -1.10673 │ 8.57414 │ 9 │ GroupA │\n",
|
||||||
|
"│ 15 │ 25 │ -3.21136 │ 9.34588 │ 5 │ GroupA │\n",
|
||||||
|
"│ 16 │ 26 │ -0.0740145 │ 11.0297 │ 9 │ GroupA │\n",
|
||||||
|
"│ 17 │ 27 │ 0.150976 │ 14.8349 │ 10 │ GroupA │\n",
|
||||||
|
"│ 18 │ 28 │ 0.769278 │ 9.38405 │ 14 │ GroupA │\n",
|
||||||
|
"│ 19 │ 29 │ -0.310153 │ 12.4906 │ 15 │ GroupA │\n",
|
||||||
|
"│ 20 │ 30 │ -0.602707 │ 9.9001 │ 7 │ GroupA │"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"insertcols!(df2,:Col1 => rand([\"GroupA\",\"GroupB\"],20))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"8. In a code cells below, create a DataFrame named df3 with columns named *id*, var4 and var5 such that id contains the values 11 through 30, var4 contains the values 21 through 40 and var5 contains the values 41 through 60."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var4</th><th>var5</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>Int64</th></tr></thead><tbody><p>20 rows × 3 columns</p><tr><th>1</th><td>11</td><td>21</td><td>41</td></tr><tr><th>2</th><td>12</td><td>22</td><td>42</td></tr><tr><th>3</th><td>13</td><td>23</td><td>43</td></tr><tr><th>4</th><td>14</td><td>24</td><td>44</td></tr><tr><th>5</th><td>15</td><td>25</td><td>45</td></tr><tr><th>6</th><td>16</td><td>26</td><td>46</td></tr><tr><th>7</th><td>17</td><td>27</td><td>47</td></tr><tr><th>8</th><td>18</td><td>28</td><td>48</td></tr><tr><th>9</th><td>19</td><td>29</td><td>49</td></tr><tr><th>10</th><td>20</td><td>30</td><td>50</td></tr><tr><th>11</th><td>21</td><td>31</td><td>51</td></tr><tr><th>12</th><td>22</td><td>32</td><td>52</td></tr><tr><th>13</th><td>23</td><td>33</td><td>53</td></tr><tr><th>14</th><td>24</td><td>34</td><td>54</td></tr><tr><th>15</th><td>25</td><td>35</td><td>55</td></tr><tr><th>16</th><td>26</td><td>36</td><td>56</td></tr><tr><th>17</th><td>27</td><td>37</td><td>57</td></tr><tr><th>18</th><td>28</td><td>38</td><td>58</td></tr><tr><th>19</th><td>29</td><td>39</td><td>59</td></tr><tr><th>20</th><td>30</td><td>40</td><td>60</td></tr></tbody></table>"
|
||||||
|
],
|
||||||
|
"text/latex": [
|
||||||
|
"\\begin{tabular}{r|ccc}\n",
|
||||||
|
"\t& ID & var4 & var5\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t& Int64 & Int64 & Int64\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t1 & 11 & 21 & 41 \\\\\n",
|
||||||
|
"\t2 & 12 & 22 & 42 \\\\\n",
|
||||||
|
"\t3 & 13 & 23 & 43 \\\\\n",
|
||||||
|
"\t4 & 14 & 24 & 44 \\\\\n",
|
||||||
|
"\t5 & 15 & 25 & 45 \\\\\n",
|
||||||
|
"\t6 & 16 & 26 & 46 \\\\\n",
|
||||||
|
"\t7 & 17 & 27 & 47 \\\\\n",
|
||||||
|
"\t8 & 18 & 28 & 48 \\\\\n",
|
||||||
|
"\t9 & 19 & 29 & 49 \\\\\n",
|
||||||
|
"\t10 & 20 & 30 & 50 \\\\\n",
|
||||||
|
"\t11 & 21 & 31 & 51 \\\\\n",
|
||||||
|
"\t12 & 22 & 32 & 52 \\\\\n",
|
||||||
|
"\t13 & 23 & 33 & 53 \\\\\n",
|
||||||
|
"\t14 & 24 & 34 & 54 \\\\\n",
|
||||||
|
"\t15 & 25 & 35 & 55 \\\\\n",
|
||||||
|
"\t16 & 26 & 36 & 56 \\\\\n",
|
||||||
|
"\t17 & 27 & 37 & 57 \\\\\n",
|
||||||
|
"\t18 & 28 & 38 & 58 \\\\\n",
|
||||||
|
"\t19 & 29 & 39 & 59 \\\\\n",
|
||||||
|
"\t20 & 30 & 40 & 60 \\\\\n",
|
||||||
|
"\\end{tabular}\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"20×3 DataFrame\n",
|
||||||
|
"│ Row │ ID │ var4 │ var5 │\n",
|
||||||
|
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
|
||||||
|
"├─────┼───────┼───────┼───────┤\n",
|
||||||
|
"│ 1 │ 11 │ 21 │ 41 │\n",
|
||||||
|
"│ 2 │ 12 │ 22 │ 42 │\n",
|
||||||
|
"│ 3 │ 13 │ 23 │ 43 │\n",
|
||||||
|
"│ 4 │ 14 │ 24 │ 44 │\n",
|
||||||
|
"│ 5 │ 15 │ 25 │ 45 │\n",
|
||||||
|
"│ 6 │ 16 │ 26 │ 46 │\n",
|
||||||
|
"│ 7 │ 17 │ 27 │ 47 │\n",
|
||||||
|
"│ 8 │ 18 │ 28 │ 48 │\n",
|
||||||
|
"│ 9 │ 19 │ 29 │ 49 │\n",
|
||||||
|
"│ 10 │ 20 │ 30 │ 50 │\n",
|
||||||
|
"│ 11 │ 21 │ 31 │ 51 │\n",
|
||||||
|
"│ 12 │ 22 │ 32 │ 52 │\n",
|
||||||
|
"│ 13 │ 23 │ 33 │ 53 │\n",
|
||||||
|
"│ 14 │ 24 │ 34 │ 54 │\n",
|
||||||
|
"│ 15 │ 25 │ 35 │ 55 │\n",
|
||||||
|
"│ 16 │ 26 │ 36 │ 56 │\n",
|
||||||
|
"│ 17 │ 27 │ 37 │ 57 │\n",
|
||||||
|
"│ 18 │ 28 │ 38 │ 58 │\n",
|
||||||
|
"│ 19 │ 29 │ 39 │ 59 │\n",
|
||||||
|
"│ 20 │ 30 │ 40 │ 60 │"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df3 = DataFrame(ID = collect(11:30), var4 = collect(21:40), var5 = collect(41:60))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"9. In a code cells below, do a join of DataFrames df2 and df3 on the id column and save the result as a new dataframe called df4"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th><th>Col1</th><th>var4</th><th>var5</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th><th>String</th><th>Int64</th><th>Int64</th></tr></thead><tbody><p>20 rows × 7 columns</p><tr><th>1</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td><td>GroupA</td><td>21</td><td>41</td></tr><tr><th>2</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td><td>GroupB</td><td>22</td><td>42</td></tr><tr><th>3</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td><td>GroupB</td><td>23</td><td>43</td></tr><tr><th>4</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td><td>GroupB</td><td>24</td><td>44</td></tr><tr><th>5</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td><td>GroupB</td><td>25</td><td>45</td></tr><tr><th>6</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td><td>GroupA</td><td>26</td><td>46</td></tr><tr><th>7</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td><td>GroupB</td><td>27</td><td>47</td></tr><tr><th>8</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td><td>GroupA</td><td>28</td><td>48</td></tr><tr><th>9</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td><td>GroupB</td><td>29</td><td>49</td></tr><tr><th>10</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td><td>GroupA</td><td>30</td><td>50</td></tr><tr><th>11</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td><td>GroupB</td><td>31</td><td>51</td></tr><tr><th>12</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td><td>GroupB</td><td>32</td><td>52</td></tr><tr><th>13</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td><td>GroupA</td><td>33</td><td>53</td></tr><tr><th>14</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td><td>GroupA</td><td>34</td><td>54</td></tr><tr><th>15</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td><td>GroupA</td><td>35</td><td>55</td></tr><tr><th>16</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td><td>GroupA</td><td>36</td><td>56</td></tr><tr><th>17</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td><td>GroupA</td><td>37</td><td>57</td></tr><tr><th>18</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td><td>GroupA</td><td>38</td><td>58</td></tr><tr><th>19</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td><td>GroupA</td><td>39</td><td>59</td></tr><tr><th>20</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td><td>GroupA</td><td>40</td><td>60</td></tr></tbody></table>"
|
||||||
|
],
|
||||||
|
"text/latex": [
|
||||||
|
"\\begin{tabular}{r|ccccccc}\n",
|
||||||
|
"\t& ID & var1 & var2 & var3 & Col1 & var4 & var5\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t& Int64 & Float64 & Float64 & Int64 & String & Int64 & Int64\\\\\n",
|
||||||
|
"\t\\hline\n",
|
||||||
|
"\t1 & 11 & -0.560501 & 9.83968 & 15 & GroupA & 21 & 41 \\\\\n",
|
||||||
|
"\t2 & 12 & -0.0192918 & 7.81756 & 14 & GroupB & 22 & 42 \\\\\n",
|
||||||
|
"\t3 & 13 & 0.128064 & 8.83897 & 11 & GroupB & 23 & 43 \\\\\n",
|
||||||
|
"\t4 & 14 & 1.85278 & 9.36913 & 10 & GroupB & 24 & 44 \\\\\n",
|
||||||
|
"\t5 & 15 & -0.827763 & 7.2771 & 15 & GroupB & 25 & 45 \\\\\n",
|
||||||
|
"\t6 & 16 & 0.110096 & 9.77109 & 15 & GroupA & 26 & 46 \\\\\n",
|
||||||
|
"\t7 & 17 & -0.251176 & 10.3317 & 6 & GroupB & 27 & 47 \\\\\n",
|
||||||
|
"\t8 & 18 & 0.369714 & 9.18312 & 5 & GroupA & 28 & 48 \\\\\n",
|
||||||
|
"\t9 & 19 & 0.0721164 & 7.98043 & 12 & GroupB & 29 & 49 \\\\\n",
|
||||||
|
"\t10 & 20 & -1.50343 & 8.91239 & 13 & GroupA & 30 & 50 \\\\\n",
|
||||||
|
"\t11 & 21 & 1.56417 & 7.54655 & 14 & GroupB & 31 & 51 \\\\\n",
|
||||||
|
"\t12 & 22 & -1.39674 & 8.91657 & 5 & GroupB & 32 & 52 \\\\\n",
|
||||||
|
"\t13 & 23 & 1.1055 & 8.62701 & 8 & GroupA & 33 & 53 \\\\\n",
|
||||||
|
"\t14 & 24 & -1.10673 & 8.57414 & 9 & GroupA & 34 & 54 \\\\\n",
|
||||||
|
"\t15 & 25 & -3.21136 & 9.34588 & 5 & GroupA & 35 & 55 \\\\\n",
|
||||||
|
"\t16 & 26 & -0.0740145 & 11.0297 & 9 & GroupA & 36 & 56 \\\\\n",
|
||||||
|
"\t17 & 27 & 0.150976 & 14.8349 & 10 & GroupA & 37 & 57 \\\\\n",
|
||||||
|
"\t18 & 28 & 0.769278 & 9.38405 & 14 & GroupA & 38 & 58 \\\\\n",
|
||||||
|
"\t19 & 29 & -0.310153 & 12.4906 & 15 & GroupA & 39 & 59 \\\\\n",
|
||||||
|
"\t20 & 30 & -0.602707 & 9.9001 & 7 & GroupA & 40 & 60 \\\\\n",
|
||||||
|
"\\end{tabular}\n"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"20×7 DataFrame\n",
|
||||||
|
"│ Row │ ID │ var1 │ var2 │ var3 │ Col1 │ var4 │ var5 │\n",
|
||||||
|
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
|
||||||
|
"├─────┼───────┼────────────┼─────────┼───────┼────────┼───────┼───────┤\n",
|
||||||
|
"│ 1 │ 11 │ -0.560501 │ 9.83968 │ 15 │ GroupA │ 21 │ 41 │\n",
|
||||||
|
"│ 2 │ 12 │ -0.0192918 │ 7.81756 │ 14 │ GroupB │ 22 │ 42 │\n",
|
||||||
|
"│ 3 │ 13 │ 0.128064 │ 8.83897 │ 11 │ GroupB │ 23 │ 43 │\n",
|
||||||
|
"│ 4 │ 14 │ 1.85278 │ 9.36913 │ 10 │ GroupB │ 24 │ 44 │\n",
|
||||||
|
"│ 5 │ 15 │ -0.827763 │ 7.2771 │ 15 │ GroupB │ 25 │ 45 │\n",
|
||||||
|
"│ 6 │ 16 │ 0.110096 │ 9.77109 │ 15 │ GroupA │ 26 │ 46 │\n",
|
||||||
|
"│ 7 │ 17 │ -0.251176 │ 10.3317 │ 6 │ GroupB │ 27 │ 47 │\n",
|
||||||
|
"│ 8 │ 18 │ 0.369714 │ 9.18312 │ 5 │ GroupA │ 28 │ 48 │\n",
|
||||||
|
"│ 9 │ 19 │ 0.0721164 │ 7.98043 │ 12 │ GroupB │ 29 │ 49 │\n",
|
||||||
|
"│ 10 │ 20 │ -1.50343 │ 8.91239 │ 13 │ GroupA │ 30 │ 50 │\n",
|
||||||
|
"│ 11 │ 21 │ 1.56417 │ 7.54655 │ 14 │ GroupB │ 31 │ 51 │\n",
|
||||||
|
"│ 12 │ 22 │ -1.39674 │ 8.91657 │ 5 │ GroupB │ 32 │ 52 │\n",
|
||||||
|
"│ 13 │ 23 │ 1.1055 │ 8.62701 │ 8 │ GroupA │ 33 │ 53 │\n",
|
||||||
|
"│ 14 │ 24 │ -1.10673 │ 8.57414 │ 9 │ GroupA │ 34 │ 54 │\n",
|
||||||
|
"│ 15 │ 25 │ -3.21136 │ 9.34588 │ 5 │ GroupA │ 35 │ 55 │\n",
|
||||||
|
"│ 16 │ 26 │ -0.0740145 │ 11.0297 │ 9 │ GroupA │ 36 │ 56 │\n",
|
||||||
|
"│ 17 │ 27 │ 0.150976 │ 14.8349 │ 10 │ GroupA │ 37 │ 57 │\n",
|
||||||
|
"│ 18 │ 28 │ 0.769278 │ 9.38405 │ 14 │ GroupA │ 38 │ 58 │\n",
|
||||||
|
"│ 19 │ 29 │ -0.310153 │ 12.4906 │ 15 │ GroupA │ 39 │ 59 │\n",
|
||||||
|
"│ 20 │ 30 │ -0.602707 │ 9.9001 │ 7 │ GroupA │ 40 │ 60 │"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df4 = innerjoin(df2,df3,on = :ID)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Julia 1.2.0",
|
||||||
|
"language": "julia",
|
||||||
|
"name": "julia-1.2"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"file_extension": ".jl",
|
||||||
|
"mimetype": "application/julia",
|
||||||
|
"name": "julia",
|
||||||
|
"version": "1.2.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,201 @@
|
||||||
|
# WORKING WITH DATA
|
||||||
|
# -----------------
|
||||||
|
|
||||||
|
# I Distributions
|
||||||
|
|
||||||
|
# * Data point values for a distribution usually follow a pattern
|
||||||
|
# * Such patterns are called distributions
|
||||||
|
# * Distributions are either discrete or continuous
|
||||||
|
# * The Distribution.jl package contains most of the common
|
||||||
|
# data distributions
|
||||||
|
|
||||||
|
# 1 Importing Distributions.jl
|
||||||
|
using Distributions
|
||||||
|
using Random
|
||||||
|
|
||||||
|
|
||||||
|
# 2 The standard normal distribution
|
||||||
|
#Seed the pseudo-random number generator
|
||||||
|
Random.seed!(1234)
|
||||||
|
#Saving the standard normal distribution as an object
|
||||||
|
n = Normal()
|
||||||
|
#Parameter values of the standard normal distribution
|
||||||
|
params(n)
|
||||||
|
#Select 10 elements at random from n
|
||||||
|
var1 = rand(n, 10)
|
||||||
|
#Calculating the mean and standard deviation of var1
|
||||||
|
mean(var1)
|
||||||
|
std(var1)
|
||||||
|
#Probability density function value at x = 0.3
|
||||||
|
pdf(Normal(), 0.3)
|
||||||
|
#Cumulative distribution function as x = 0.25
|
||||||
|
cdf(Normal(), 0.25)
|
||||||
|
#Quantiles
|
||||||
|
quantile(Normal(), 0.025)
|
||||||
|
quantile(Normal(), 0.975)
|
||||||
|
|
||||||
|
# 3 The normal distribution
|
||||||
|
#Returning the parameters of the normal distribution
|
||||||
|
fieldnames(Normal)
|
||||||
|
#Creating 100 data point values from a normal distribution
|
||||||
|
# with a mean of 100 and a standard deviation of 10
|
||||||
|
var2 = rand(Normal(100, 10), 100)
|
||||||
|
#Calculating the mean and standard deviation of var2
|
||||||
|
mean(var2)
|
||||||
|
std(var2)
|
||||||
|
#Using fit() to calculate the parameters of a distribution
|
||||||
|
fit(Normal, var2)
|
||||||
|
|
||||||
|
# 3 Skewness and kurtosis
|
||||||
|
skewness(var2)
|
||||||
|
kurtosis(var2)
|
||||||
|
|
||||||
|
# 4 Beta distribution
|
||||||
|
b = Beta(1, 1)
|
||||||
|
params(b)
|
||||||
|
var3 = rand(b, 100)
|
||||||
|
fit(Beta, var3)
|
||||||
|
|
||||||
|
# 5 χ2 distribution
|
||||||
|
c = Chisq(1)
|
||||||
|
var4 = rand(c, 100)
|
||||||
|
fieldnames(Chisq) # Degrees of freedom
|
||||||
|
|
||||||
|
# 6 Distribution types are hierarchical
|
||||||
|
supertype(Normal)
|
||||||
|
subtypes(Distribution{Univariate,Continuous})
|
||||||
|
subtypes(Distribution{Univariate,Discrete})
|
||||||
|
# * Search for help in the REPL
|
||||||
|
|
||||||
|
# II DataFrames
|
||||||
|
|
||||||
|
using DataFrames
|
||||||
|
|
||||||
|
# * Allows for creation of a flat data structure (rows and columns)
|
||||||
|
# * Columns are variables
|
||||||
|
# * Rows are subjects (examples)
|
||||||
|
|
||||||
|
# 1 Create a DataFrame
|
||||||
|
typeof(var2)
|
||||||
|
#Create and empty DataFrame
|
||||||
|
df = DataFrame()
|
||||||
|
|
||||||
|
# 2 Add a column with data point values (rows)
|
||||||
|
df[:Var2] = var2
|
||||||
|
#View first five rows
|
||||||
|
head(df)
|
||||||
|
|
||||||
|
# 3 Add another columns
|
||||||
|
df[:Var3] = var3
|
||||||
|
#View last three rows
|
||||||
|
tail(df, 3)
|
||||||
|
|
||||||
|
# 4 Dimensions of a DataFrame
|
||||||
|
size(df)
|
||||||
|
rows = size(df, 1)
|
||||||
|
columns = size(df, 2)
|
||||||
|
|
||||||
|
# 5 Inspect content
|
||||||
|
showcols(df)
|
||||||
|
#Data type only
|
||||||
|
eltypes(df)
|
||||||
|
#Descriptive statistics
|
||||||
|
describe(df)
|
||||||
|
#Print in console
|
||||||
|
print(describe(df))
|
||||||
|
|
||||||
|
# 6 Create a bigger DataFrame
|
||||||
|
df2 = DataFrame()
|
||||||
|
df2[:A] = 1:10
|
||||||
|
df2[:B] = ["I", "II", "II", "I", "II","I", "II", "II", "I", "II"]
|
||||||
|
df2[:C] = rand(Normal(), 10)
|
||||||
|
df2[:D] = rand(Chisq(1), 10)
|
||||||
|
|
||||||
|
# 7 Slicing
|
||||||
|
#First three rows
|
||||||
|
df2[1:3, :]
|
||||||
|
#All rows columns 1 and 3
|
||||||
|
df2[:, [1, 3]]
|
||||||
|
#Different notation
|
||||||
|
df2[:, [:A, :C]]
|
||||||
|
|
||||||
|
# III Importing data files
|
||||||
|
|
||||||
|
# 1 Set working directory in Atom settings under Julia tab
|
||||||
|
pwd()
|
||||||
|
|
||||||
|
# 2 Import CSV
|
||||||
|
using CSV
|
||||||
|
|
||||||
|
# 3 Import csv file (in same directory)
|
||||||
|
data1 = CSV.read("CCS.csv")
|
||||||
|
|
||||||
|
# 4 Explore the data
|
||||||
|
typeof(data1)
|
||||||
|
head(data1)
|
||||||
|
showcols(data1)
|
||||||
|
eltypes(data1)
|
||||||
|
describe(data1)
|
||||||
|
|
||||||
|
# 5 Combining DataFrames
|
||||||
|
#Creating DataFrames
|
||||||
|
subjects = DataFrame(Number = [100, 101, 102, 103], Stage = ["I", "III", "II", "I"])
|
||||||
|
treatment = DataFrame(Number = [103, 102, 101, 100], Treatment = ["A", "B", "A", "B"])
|
||||||
|
subjects
|
||||||
|
treatment
|
||||||
|
#Joining
|
||||||
|
df3 = join(subjects, treatment, on = :Number);
|
||||||
|
df3
|
||||||
|
#Adding a longer list of subjects
|
||||||
|
subjects = DataFrame(Number = [100, 101, 102, 103, 104, 105], Stage = ["I", "III", "II", "I", "II", "II"])
|
||||||
|
#Inner join
|
||||||
|
df4 = join(subjects, treatment, on = :Number, kind = :inner);
|
||||||
|
df4
|
||||||
|
#Outer joing: empty fields filled with missing
|
||||||
|
df5 = join(subjects, treatment, on = :Number, kind = :outer);
|
||||||
|
df5
|
||||||
|
|
||||||
|
# 6 Grouping
|
||||||
|
#Creating a new DataFrame
|
||||||
|
df6 = DataFrame(Group = rand(["A", "B", "C"], 15), Variable1 = randn(15), Variable2 = rand(15));
|
||||||
|
df6
|
||||||
|
#Grouping using by()
|
||||||
|
by(df6, :Group, size)
|
||||||
|
#Count unique data point values in :Group column
|
||||||
|
by(df6, :Group, dfc -> DataFrame(Count = size(dfc, 1)))
|
||||||
|
#Aggregate for descriptive statistics
|
||||||
|
print(aggregate(df6, :Group, [mean, std]))
|
||||||
|
#Group
|
||||||
|
groupby(df6, :Group)
|
||||||
|
length(groupby(df6, :Group))
|
||||||
|
groupby(df6, :Group)[2]
|
||||||
|
|
||||||
|
# 7 Selection
|
||||||
|
df6[:Variable1] .> 0
|
||||||
|
sub(df6, df6[:Variable1] .> 0)
|
||||||
|
|
||||||
|
# 8 New DataFrame by selection
|
||||||
|
df6A = sub(df6, df6[:Group] .== "A");
|
||||||
|
df6A
|
||||||
|
|
||||||
|
# 9 Sorting
|
||||||
|
df6S = sort!(df6, cols = [:Group, :Variable1], rev = true);
|
||||||
|
df6S
|
||||||
|
df6S2 = sort!(df6, cols = [:Group, :Variable1, :Variable2], rev = (false, false, true));
|
||||||
|
df6S2
|
||||||
|
|
||||||
|
# 10 Unique rows
|
||||||
|
#Creating a DataFrame with an obvious duplicate row
|
||||||
|
df7 = DataFrame(A = [1, 2, 2, 3, 4, 5], B = [11, 12, 12, 13, 14, 15], C = ["A", "B", "B", "C", "D", "E"]);
|
||||||
|
df7
|
||||||
|
#Only unique rows
|
||||||
|
unique(df7)
|
||||||
|
df7
|
||||||
|
#Permanant change
|
||||||
|
unique!(df7)
|
||||||
|
df7
|
||||||
|
|
||||||
|
# 11 Delete rows
|
||||||
|
#Permanently
|
||||||
|
deleterows!(df7, [1, 5])
|
||||||
|
df7
|
Loading…
Reference in New Issue