[ADD] honors material

2020-05-30 15:58:12 -06:00 · 2020-05-30 15:58:12 -06:00 · 9c0c146183
commit 9c0c146183
parent f1f66b5d2e
7 changed files with 7720 additions and 0 deletions
--- a/Week4_Collections.ipynb
+++ b/Week4_Collections.ipynb
--- a/Week4_Collections.jl
+++ b/Week4_Collections.jl
@ -0,0 +1,244 @@
 # COLLECTIONS
 # -----------
 # * Collections are groups of elements
 # * Elements are values of different Julia data types
 # * Storing elements in collections is one of the most useful
 #    operations in computing
 # I ARRAYS
 # *  Arrays are collections of values separated with commas and
 #     them inside of square brackets
 # * They are represented in column or in row form
 # 1 Like a column vector (click on the downward arrow)
 array1 = [1, 2, 3]
 typeof(array1)
 # 2 Like row vector (click on the downward arrow)
 array2 = [1 2 3]
 typeof(array2)
 # 3 The transpose converts betwee the two
 transpose(array1)
 #The apostrophe is an alternative notation
 array1'
 # 4 Boolean logic (==)
 transpose(array1) == array1'
 # 5 Data type inheritance
 #With a mix of types, all the elements inherent the "highest" type
 array2 = [1, 2, 3.0]
 #Index for one of the original integers will be Float64
 array2[1]
 # 6 Column-wise entry of multidimensional array
 array3 = [[1, 2, 3] [4, 5, 6] [7, 8, 9]]
 # 7 Row-wise entry of multidimensional array
 array4 = [[1 2 3]; [4 5 6]; [7 8 9]]
 # 8 Length of array
 length(array3)
 length(array4)
 # 9 Index order of column-wise array
 for i in 1:length(array3)
    println("Element $(i) is ", array3[i])
 end
 # 10 Index order of row-wise array
 for i in 1:length(array4)
    println("Element $(i) is ", array4[i])
 end
 # 11 Using repeat() to repeat elements
 repeat([1, 2], 3)
 # 12 Using range(start, step, number of elements)
 range(1, step = 1, length = 10)
 typeof(range(1, step = 1, length = 10))
 # 13 Create collections usin gthe collect() function
 collect(range(1, step = 1, length = 10))
 #Short-hand
 collect(1:10)
 typeof(1:10)
 #Add step size
 collect(2:2:10)
 typeof(collect(2:2:10))
 # 14 Creating empty array with two rows and three columns
 array5 = Array{Union{Missing, Int}}(missing, 2, 3)
 # 15 Reshaping
 reshape(array5, 3, 2)
 # 16 Indexing (slicing)
 #Random uniform distribution of values in closed domain [10,20]
 #Shape 10 x 5
 array6 = rand(10:20, 10, 5)
 #All rows in first column
 array6[:, 1]
 #Rows two through five of second column
 array6[2:5, 2]
 #Values in rows 2, 4, 6, and in columns 1 and 5
 array6[[2, 4, 6], [1, 5]]
 #Values in row 1 from column 3 to the last column
 array6[1, 3:end]
 # Boolean logic (returning only true and false)
 array6[:, 1] .> 12
 # 17 Changing element values
 array7 = [1, 2, 3, 4, 5]
 #Permanantly append 10 to end of array
 push!(array7, 10)
 #Remove last element
 #Only the removed value will be displayed
 pop!(array7)
 array7
 #Change second element value to 1000
 array7[2] = 1000
 array7
 # 18 List comprehension
 array8 = [3 * i for i in 1:5]
 #Column-wise collection iterating through second element first
 array9 = [a * b for a in 1:3, b in 1:3]
 # 19 Arithmetic on arrays
 #Elementwise addition of a scalar using dot notation
 array8 .+ 1
 #Elementwise addition of similar sized arrays
 array7 + array8
 # 20 Missing
 # * Missing is a Julia data type
 # * Provides a placeholder for missing data in a statistical sense
 # * Propagates automatically
 # * Equality as a type can be tested
 # * Sorting is possible since missing is seen as greater than other values
 #Propagation
 missing + 1
 missing > 1
 [1, 2, 3, missing, 5] + [10, 20, 30, 40 ,50]
 #Cannot return true or false since value is not known
 missing == missing
 #Equality
 missing === missing
 isequal(missing, missing)
 #Sorting with isless()
 isless(1, missing)
 isless(Inf, missing)
 # 21 Array of integer zeros
 array11 = zeros(Int8, 3, 3)
 # 22 Array of floating point ones
 array12 = ones(Float16, 3, 3)
 # 23 Array of true (bit array) values
 array13 = trues(3, 3)
 # 24 Fill an array with n elements of value x
 array14 = fill(10, 3, 3)
 # 25 Convert elements to a different data type
 convert.(Float16, array14)
 # 26 Concatenation
 #Concatenate arrays along rows (makes row)
 array15 = [1, 2, 3]
 array16 = [10, 20, 30]
 cat(array15, array16, dims = 1)
 #Same as above
 vcat(array15, array16)
 #Concatenate arrays along columns (makes colums)
 cat(array15, array16, dims = 2)
 #Same as above
 hcat(array15, array16)
 # II TUPLES
 # * Tuples are immutable collections
 # 1 Tuples with mixed types
 tuple1 = (1, 2, 3, 4, "Julia")
 #For loop to look at value and type of each element
 for i in 1:length(tuple1)
    println(" The value of the tuple at index number $(i) is $(tuple1[i]) and the type is $(typeof(tuple1[i])).")
 end
 # 2 Each element can be named
 a, b, c, seven = (1, 3, 5, 7)
 a
 seven
 # 2 Reverse order index (can be done with arrays too)
 tuple1[end:-1:1]
 # 3 Mixed length tuples
 tuple2 = ((1, 2, 3), 1, 2, (3, 100, 1))
 #Element 4
 tuple2[4]
 #Element 2 in element 4
 tuple2[4][2]
 # III DICTIONARIES
 # * Dictionaries are collection sof key-value pairs
 # 1 Example of a dictionary
 dictionary1 = Dict(1 => 77, 2 => 66, 3 => 1)
 #The => is shorthand for the Pair() function
 dictionary2 = Dict(Pair(1,100), Pair(2,200), Pair(3,300))
 # 2 Specifying types
 dictionary3 = Dict{Any, Any}(1 => 77, 2 => 66, 3 => "three")
 #We can get a bit crazy
 dictionary4 = Dict{Any, Any}("a" => 1, (2, 3) => "hello")
 # 3 Using symbols as keys
 dictionary5 = Dict(:A => 300, :B => 305, :C => 309)
 dictionary5[:A]
 # 4 Using in() to check on key-value pairs
 in((:A => 300), dictionary5)
 # 5 Changing an existing value
 dictionary5[:C] = 1000
 dictionary5
 # 6 Using the delete!() function
 delete!(dictionary5, :A)
 # 7 The keys of a dictionary
 keys(dictionary5)
 # 8 The values of a dictionary
 values(dictionary5)
 # 8 Creating a dictionary with automatic keys
 procedure_vals = ["Appendectomy", "Colectomy", "Cholecystectomy"]
 procedure_dict = Dict{AbstractString,AbstractString}()
 for (s, n) in enumerate(procedure_vals)
    procedure_dict["x_$(s)"] = n
 end
 #Procedure_dict is now a dictionary
 procedure_dict
 # 9 Iterating through a dictionary by key and value
 for (k, v) in procedure_dict
    println(k, " is ",v)
 end
 # 10 Sorting
 dictionary6 = Dict("a"=> 1,"b"=>2 ,"c"=>3 ,"d"=>4 ,"e"=>5 ,"f"=>6)
 # Sorting using a for loop
 for k in sort(collect(keys(dictionary6)))
    println("$(k) is $(dictionary6[k])")
 end
--- a/Week4_Functions.ipynb
+++ b/Week4_Functions.ipynb
--- a/Week4_Functions.jl
+++ b/Week4_Functions.jl
@ -0,0 +1,104 @@
 +(2, 2)
 versioninfo()
 # FUNCTIONS IN JULIA 1.0 (0.7+)
 # ----------------------
 # * A function maps a tuple of arguments to a return value
 # I Creating basic Functions
 # 1 using the function keyword
 #Create a function named my_addition
 #Takes two arguments
 #Return the addition of the two values
 function my_addition(x, y)
    return x + y
 end
 # 2 Calling a function
 #Call the function with two argument values
 my_addition(3, 4)
 # 3 Built-in functions
 #The plus, +, symbol (as other arithmetical symbols) are built-in functions
 +(3, 4)
 # 4 Using a Unicode symbol as a function name
 function Σ(x, y)
    return x + y
 end
 Σ(3, 4)
 # II Anonymous functions
 # * Functions can be assigned to variables
 # * Functions can be used as arguments
 # * Functions can be returned as values
 # 1 An anonymous function
 x -> x^2 + 3
 # 2 A function as an argument
 #Passing the round() function as argument to the map() function
 map(round, [2.1, 3.4, 7.9])
 #Passing an anonymous function as an arguemt to the map() function
 map(x -> x^2, [2, 3, 4])
 # III Tuples and functions
 # * Tuples are immutable collections
 # 1 Examples of tuples
 my_tuple = (1, "Julia", 7)
 typeof(my_tuple)
 # * Single value tuple must have a comma
 my_second_tuple = (4,)
 typeof(my_second_tuple)
 # 2 Indexing a tuple
 length(my_tuple)
 my_tuple[2]
 # 3 Tuple indexing
 # * Named tuple creates a name for each element
 my_other_tuple = (a = 4, b = "Julia", c = 3)
 #Indexing tuple by name
 my_other_tuple.b
 # 4 Function returns
 # * Multiple return values of a function are tuples
 function my_function(a, b)
    return a + b, a - b
 end
 #Calling the function
 my_function(10, 5)
 #Looking up the type of the function return
 typeof(my_function(10, 5))
 #This allows for each element to be given a variable
 r, s = my_function(10, 5)
 r
 s
 # IV Functions with keyword arguments
 # * Keyword arguments are added after semi-colon
 # * Their order is not explicit
 # * Default values are addded
 # 1 Creating a function with a keyword argument
 function my_keyword_function(x, y; z = 3)
    return x + y + z
 end
 # * Omission of keyword argument uses default
 my_keyword_function(1, 2)
 # * Keyword argument names must be used
 my_keyword_function(1, 2, z = 10)
 # 4 Use of dot notation for functions
 # * Passes a collection elementwise to a function
 # * Use instead of map()
 sin.([0., π/2., π, 3/2. * π, π])
--- a/Week4_PR_Template.ipynb
+++ b/Week4_PR_Template.ipynb
@ -0,0 +1,569 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Week 4 Peer Review"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. In a code cell below, import the required packages: Distributions, DataFrames, and Random (install these packages via the REPL if required)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import the required packages\n",
    "using Distributions, DataFrames, Random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Seed the random number generator\n",
    "Random.seed!(1234);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3. In a code cell below, create a dataframe named df1, with 30 rows and 4 columns (variables). Call the first column ID. It should hold the values 1 through 30 (to make up 30 rows). Use three rand() function calls to generate three more columns named var1, var2, and var3. The second column (var1) should consist of 30 values from a standard normal distribution (mean of 0 and standard deviation of 1). The third column (var2) should consist of 30 random value from a normal distribution with a mean of 10 and a standard deviation of 2. The last column (var3) should contain 30 random values chosen from a range of integers between (and including) 5 and 15."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th></tr></thead><tbody><p>30 rows × 4 columns</p><tr><th>1</th><td>1</td><td>0.867347</td><td>7.44066</td><td>14</td></tr><tr><th>2</th><td>2</td><td>-0.901744</td><td>11.9946</td><td>13</td></tr><tr><th>3</th><td>3</td><td>-0.494479</td><td>10.6048</td><td>12</td></tr><tr><th>4</th><td>4</td><td>-0.902914</td><td>9.92711</td><td>9</td></tr><tr><th>5</th><td>5</td><td>0.864401</td><td>10.2839</td><td>15</td></tr><tr><th>6</th><td>6</td><td>2.21188</td><td>11.0425</td><td>14</td></tr><tr><th>7</th><td>7</td><td>0.532813</td><td>11.7935</td><td>15</td></tr><tr><th>8</th><td>8</td><td>-0.271735</td><td>8.97294</td><td>9</td></tr><tr><th>9</th><td>9</td><td>0.502334</td><td>8.4704</td><td>9</td></tr><tr><th>10</th><td>10</td><td>-0.516984</td><td>6.91715</td><td>8</td></tr><tr><th>11</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td></tr><tr><th>12</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td></tr><tr><th>13</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td></tr><tr><th>14</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td></tr><tr><th>15</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td></tr><tr><th>16</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td></tr><tr><th>17</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td></tr><tr><th>18</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td></tr><tr><th>19</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td></tr><tr><th>20</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td></tr><tr><th>21</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td></tr><tr><th>22</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td></tr><tr><th>23</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td></tr><tr><th>24</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td></tr><tr><th>25</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td></tr><tr><th>26</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td></tr><tr><th>27</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td></tr><tr><th>28</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td></tr><tr><th>29</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td></tr><tr><th>30</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& ID & var1 & var2 & var3\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Float64 & Float64 & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 1 & 0.867347 & 7.44066 & 14 \\\\\n",
       "\t2 & 2 & -0.901744 & 11.9946 & 13 \\\\\n",
       "\t3 & 3 & -0.494479 & 10.6048 & 12 \\\\\n",
       "\t4 & 4 & -0.902914 & 9.92711 & 9 \\\\\n",
       "\t5 & 5 & 0.864401 & 10.2839 & 15 \\\\\n",
       "\t6 & 6 & 2.21188 & 11.0425 & 14 \\\\\n",
       "\t7 & 7 & 0.532813 & 11.7935 & 15 \\\\\n",
       "\t8 & 8 & -0.271735 & 8.97294 & 9 \\\\\n",
       "\t9 & 9 & 0.502334 & 8.4704 & 9 \\\\\n",
       "\t10 & 10 & -0.516984 & 6.91715 & 8 \\\\\n",
       "\t11 & 11 & -0.560501 & 9.83968 & 15 \\\\\n",
       "\t12 & 12 & -0.0192918 & 7.81756 & 14 \\\\\n",
       "\t13 & 13 & 0.128064 & 8.83897 & 11 \\\\\n",
       "\t14 & 14 & 1.85278 & 9.36913 & 10 \\\\\n",
       "\t15 & 15 & -0.827763 & 7.2771 & 15 \\\\\n",
       "\t16 & 16 & 0.110096 & 9.77109 & 15 \\\\\n",
       "\t17 & 17 & -0.251176 & 10.3317 & 6 \\\\\n",
       "\t18 & 18 & 0.369714 & 9.18312 & 5 \\\\\n",
       "\t19 & 19 & 0.0721164 & 7.98043 & 12 \\\\\n",
       "\t20 & 20 & -1.50343 & 8.91239 & 13 \\\\\n",
       "\t21 & 21 & 1.56417 & 7.54655 & 14 \\\\\n",
       "\t22 & 22 & -1.39674 & 8.91657 & 5 \\\\\n",
       "\t23 & 23 & 1.1055 & 8.62701 & 8 \\\\\n",
       "\t24 & 24 & -1.10673 & 8.57414 & 9 \\\\\n",
       "\t25 & 25 & -3.21136 & 9.34588 & 5 \\\\\n",
       "\t26 & 26 & -0.0740145 & 11.0297 & 9 \\\\\n",
       "\t27 & 27 & 0.150976 & 14.8349 & 10 \\\\\n",
       "\t28 & 28 & 0.769278 & 9.38405 & 14 \\\\\n",
       "\t29 & 29 & -0.310153 & 12.4906 & 15 \\\\\n",
       "\t30 & 30 & -0.602707 & 9.9001 & 7 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "30×4 DataFrame\n",
       "│ Row │ ID    │ var1       │ var2    │ var3  │\n",
       "│     │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m    │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
       "├─────┼───────┼────────────┼─────────┼───────┤\n",
       "│ 1   │ 1     │ 0.867347   │ 7.44066 │ 14    │\n",
       "│ 2   │ 2     │ -0.901744  │ 11.9946 │ 13    │\n",
       "│ 3   │ 3     │ -0.494479  │ 10.6048 │ 12    │\n",
       "│ 4   │ 4     │ -0.902914  │ 9.92711 │ 9     │\n",
       "│ 5   │ 5     │ 0.864401   │ 10.2839 │ 15    │\n",
       "│ 6   │ 6     │ 2.21188    │ 11.0425 │ 14    │\n",
       "│ 7   │ 7     │ 0.532813   │ 11.7935 │ 15    │\n",
       "│ 8   │ 8     │ -0.271735  │ 8.97294 │ 9     │\n",
       "│ 9   │ 9     │ 0.502334   │ 8.4704  │ 9     │\n",
       "│ 10  │ 10    │ -0.516984  │ 6.91715 │ 8     │\n",
       "⋮\n",
       "│ 20  │ 20    │ -1.50343   │ 8.91239 │ 13    │\n",
       "│ 21  │ 21    │ 1.56417    │ 7.54655 │ 14    │\n",
       "│ 22  │ 22    │ -1.39674   │ 8.91657 │ 5     │\n",
       "│ 23  │ 23    │ 1.1055     │ 8.62701 │ 8     │\n",
       "│ 24  │ 24    │ -1.10673   │ 8.57414 │ 9     │\n",
       "│ 25  │ 25    │ -3.21136   │ 9.34588 │ 5     │\n",
       "│ 26  │ 26    │ -0.0740145 │ 11.0297 │ 9     │\n",
       "│ 27  │ 27    │ 0.150976   │ 14.8349 │ 10    │\n",
       "│ 28  │ 28    │ 0.769278   │ 9.38405 │ 14    │\n",
       "│ 29  │ 29    │ -0.310153  │ 12.4906 │ 15    │\n",
       "│ 30  │ 30    │ -0.602707  │ 9.9001  │ 7     │"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = DataFrame(ID = 1:30, var1 = rand(Normal(0,1),30), var2 = rand(Normal(10,2),30), var3 = rand(5:15,30))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "4.In code cells below, write the code to calculate the mean and variance of each column in the dataframe. For example for the first variable this could be done using the println function and referring to each column (variable) by its symbol notation. Try to shorten the code with a for-loop, iterating over the variables names (in symbol format)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "┌ Warning: `getindex(df::DataFrame, col_ind::ColumnIndex)` is deprecated, use `df[!, col_ind]` instead.\n",
      "│   caller = top-level scope at In[4]:3\n",
      "└ @ Core ./In[4]:3\n",
      "┌ Warning: `getindex(df::DataFrame, col_ind::ColumnIndex)` is deprecated, use `df[!, col_ind]` instead.\n",
      "│   caller = top-level scope at In[4]:4\n",
      "└ @ Core ./In[4]:4\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The mean of var1 is: -0.061674963752526096, the variance is: 1.1790054448274625\n",
      "The mean of var2 is: 9.580613055613338, the variance is: 2.948790077536739\n",
      "The mean of var3 is: 11.0, the variance is: 11.724137931034482\n"
     ]
    }
   ],
   "source": [
    "for s in [:var1,:var2,:var3] #names(df)\n",
    "    colname = String(s)\n",
    "    meancol = mean(df[s])\n",
    "    variancecol = var(df[s])\n",
    "    println(\"The mean of $colname is: $meancol, the variance is: $variancecol\")\n",
    "end"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "5. In a code cells below, create a new DataFrame named df2 from the last 20 rows of the original DataFrame, df1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th></tr></thead><tbody><p>20 rows × 4 columns</p><tr><th>1</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td></tr><tr><th>2</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td></tr><tr><th>3</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td></tr><tr><th>4</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td></tr><tr><th>5</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td></tr><tr><th>6</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td></tr><tr><th>7</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td></tr><tr><th>8</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td></tr><tr><th>9</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td></tr><tr><th>10</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td></tr><tr><th>11</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td></tr><tr><th>12</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td></tr><tr><th>13</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td></tr><tr><th>14</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td></tr><tr><th>15</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td></tr><tr><th>16</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td></tr><tr><th>17</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td></tr><tr><th>18</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td></tr><tr><th>19</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td></tr><tr><th>20</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccc}\n",
       "\t& ID & var1 & var2 & var3\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Float64 & Float64 & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 11 & -0.560501 & 9.83968 & 15 \\\\\n",
       "\t2 & 12 & -0.0192918 & 7.81756 & 14 \\\\\n",
       "\t3 & 13 & 0.128064 & 8.83897 & 11 \\\\\n",
       "\t4 & 14 & 1.85278 & 9.36913 & 10 \\\\\n",
       "\t5 & 15 & -0.827763 & 7.2771 & 15 \\\\\n",
       "\t6 & 16 & 0.110096 & 9.77109 & 15 \\\\\n",
       "\t7 & 17 & -0.251176 & 10.3317 & 6 \\\\\n",
       "\t8 & 18 & 0.369714 & 9.18312 & 5 \\\\\n",
       "\t9 & 19 & 0.0721164 & 7.98043 & 12 \\\\\n",
       "\t10 & 20 & -1.50343 & 8.91239 & 13 \\\\\n",
       "\t11 & 21 & 1.56417 & 7.54655 & 14 \\\\\n",
       "\t12 & 22 & -1.39674 & 8.91657 & 5 \\\\\n",
       "\t13 & 23 & 1.1055 & 8.62701 & 8 \\\\\n",
       "\t14 & 24 & -1.10673 & 8.57414 & 9 \\\\\n",
       "\t15 & 25 & -3.21136 & 9.34588 & 5 \\\\\n",
       "\t16 & 26 & -0.0740145 & 11.0297 & 9 \\\\\n",
       "\t17 & 27 & 0.150976 & 14.8349 & 10 \\\\\n",
       "\t18 & 28 & 0.769278 & 9.38405 & 14 \\\\\n",
       "\t19 & 29 & -0.310153 & 12.4906 & 15 \\\\\n",
       "\t20 & 30 & -0.602707 & 9.9001 & 7 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "20×4 DataFrame\n",
       "│ Row │ ID    │ var1       │ var2    │ var3  │\n",
       "│     │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m    │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
       "├─────┼───────┼────────────┼─────────┼───────┤\n",
       "│ 1   │ 11    │ -0.560501  │ 9.83968 │ 15    │\n",
       "│ 2   │ 12    │ -0.0192918 │ 7.81756 │ 14    │\n",
       "│ 3   │ 13    │ 0.128064   │ 8.83897 │ 11    │\n",
       "│ 4   │ 14    │ 1.85278    │ 9.36913 │ 10    │\n",
       "│ 5   │ 15    │ -0.827763  │ 7.2771  │ 15    │\n",
       "│ 6   │ 16    │ 0.110096   │ 9.77109 │ 15    │\n",
       "│ 7   │ 17    │ -0.251176  │ 10.3317 │ 6     │\n",
       "│ 8   │ 18    │ 0.369714   │ 9.18312 │ 5     │\n",
       "│ 9   │ 19    │ 0.0721164  │ 7.98043 │ 12    │\n",
       "│ 10  │ 20    │ -1.50343   │ 8.91239 │ 13    │\n",
       "│ 11  │ 21    │ 1.56417    │ 7.54655 │ 14    │\n",
       "│ 12  │ 22    │ -1.39674   │ 8.91657 │ 5     │\n",
       "│ 13  │ 23    │ 1.1055     │ 8.62701 │ 8     │\n",
       "│ 14  │ 24    │ -1.10673   │ 8.57414 │ 9     │\n",
       "│ 15  │ 25    │ -3.21136   │ 9.34588 │ 5     │\n",
       "│ 16  │ 26    │ -0.0740145 │ 11.0297 │ 9     │\n",
       "│ 17  │ 27    │ 0.150976   │ 14.8349 │ 10    │\n",
       "│ 18  │ 28    │ 0.769278   │ 9.38405 │ 14    │\n",
       "│ 19  │ 29    │ -0.310153  │ 12.4906 │ 15    │\n",
       "│ 20  │ 30    │ -0.602707  │ 9.9001  │ 7     │"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df[11:end,:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "6. In a code cells below, show the results of computing simple descriptive statistics on this new DataFrame using the describe() function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>mean</th><th>min</th><th>median</th><th>max</th><th>nunique</th><th>nmissing</th><th>eltype</th></tr><tr><th></th><th>Symbol</th><th>Float64</th><th>Real</th><th>Float64</th><th>Real</th><th>Nothing</th><th>Nothing</th><th>DataType</th></tr></thead><tbody><p>4 rows × 8 columns</p><tr><th>1</th><td>ID</td><td>20.5</td><td>11</td><td>20.5</td><td>30</td><td></td><td></td><td>Int64</td></tr><tr><th>2</th><td>var1</td><td>-0.187058</td><td>-3.21136</td><td>-0.0466532</td><td>1.85278</td><td></td><td></td><td>Float64</td></tr><tr><th>3</th><td>var2</td><td>9.49853</td><td>7.2771</td><td>9.2645</td><td>14.8349</td><td></td><td></td><td>Float64</td></tr><tr><th>4</th><td>var3</td><td>10.6</td><td>5</td><td>10.5</td><td>15</td><td></td><td></td><td>Int64</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|cccccccc}\n",
       "\t& variable & mean & min & median & max & nunique & nmissing & eltype\\\\\n",
       "\t\\hline\n",
       "\t& Symbol & Float64 & Real & Float64 & Real & Nothing & Nothing & DataType\\\\\n",
       "\t\\hline\n",
       "\t1 & ID & 20.5 & 11 & 20.5 & 30 &  &  & Int64 \\\\\n",
       "\t2 & var1 & -0.187058 & -3.21136 & -0.0466532 & 1.85278 &  &  & Float64 \\\\\n",
       "\t3 & var2 & 9.49853 & 7.2771 & 9.2645 & 14.8349 &  &  & Float64 \\\\\n",
       "\t4 & var3 & 10.6 & 5 & 10.5 & 15 &  &  & Int64 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "4×8 DataFrame. Omitted printing of 2 columns\n",
       "│ Row │ variable │ mean      │ min      │ median     │ max     │ nunique │\n",
       "│     │ \u001b[90mSymbol\u001b[39m   │ \u001b[90mFloat64\u001b[39m   │ \u001b[90mReal\u001b[39m     │ \u001b[90mFloat64\u001b[39m    │ \u001b[90mReal\u001b[39m    │ \u001b[90mNothing\u001b[39m │\n",
       "├─────┼──────────┼───────────┼──────────┼────────────┼─────────┼─────────┤\n",
       "│ 1   │ ID       │ 20.5      │ 11       │ 20.5       │ 30      │         │\n",
       "│ 2   │ var1     │ -0.187058 │ -3.21136 │ -0.0466532 │ 1.85278 │         │\n",
       "│ 3   │ var2     │ 9.49853   │ 7.2771   │ 9.2645     │ 14.8349 │         │\n",
       "│ 4   │ var3     │ 10.6      │ 5        │ 10.5       │ 15      │         │"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "describe(df2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "7. In a code cells below, add a column named cat1 to the df2 DataFrame consisting of a random selection of 20 values from the sample space GroupA and GroupB.m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th><th>Col1</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th><th>String</th></tr></thead><tbody><p>20 rows × 5 columns</p><tr><th>1</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td><td>GroupA</td></tr><tr><th>2</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td><td>GroupB</td></tr><tr><th>3</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td><td>GroupB</td></tr><tr><th>4</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td><td>GroupB</td></tr><tr><th>5</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td><td>GroupB</td></tr><tr><th>6</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td><td>GroupA</td></tr><tr><th>7</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td><td>GroupB</td></tr><tr><th>8</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td><td>GroupA</td></tr><tr><th>9</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td><td>GroupB</td></tr><tr><th>10</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td><td>GroupA</td></tr><tr><th>11</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td><td>GroupB</td></tr><tr><th>12</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td><td>GroupB</td></tr><tr><th>13</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td><td>GroupA</td></tr><tr><th>14</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td><td>GroupA</td></tr><tr><th>15</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td><td>GroupA</td></tr><tr><th>16</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td><td>GroupA</td></tr><tr><th>17</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td><td>GroupA</td></tr><tr><th>18</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td><td>GroupA</td></tr><tr><th>19</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td><td>GroupA</td></tr><tr><th>20</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td><td>GroupA</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|ccccc}\n",
       "\t& ID & var1 & var2 & var3 & Col1\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Float64 & Float64 & Int64 & String\\\\\n",
       "\t\\hline\n",
       "\t1 & 11 & -0.560501 & 9.83968 & 15 & GroupA \\\\\n",
       "\t2 & 12 & -0.0192918 & 7.81756 & 14 & GroupB \\\\\n",
       "\t3 & 13 & 0.128064 & 8.83897 & 11 & GroupB \\\\\n",
       "\t4 & 14 & 1.85278 & 9.36913 & 10 & GroupB \\\\\n",
       "\t5 & 15 & -0.827763 & 7.2771 & 15 & GroupB \\\\\n",
       "\t6 & 16 & 0.110096 & 9.77109 & 15 & GroupA \\\\\n",
       "\t7 & 17 & -0.251176 & 10.3317 & 6 & GroupB \\\\\n",
       "\t8 & 18 & 0.369714 & 9.18312 & 5 & GroupA \\\\\n",
       "\t9 & 19 & 0.0721164 & 7.98043 & 12 & GroupB \\\\\n",
       "\t10 & 20 & -1.50343 & 8.91239 & 13 & GroupA \\\\\n",
       "\t11 & 21 & 1.56417 & 7.54655 & 14 & GroupB \\\\\n",
       "\t12 & 22 & -1.39674 & 8.91657 & 5 & GroupB \\\\\n",
       "\t13 & 23 & 1.1055 & 8.62701 & 8 & GroupA \\\\\n",
       "\t14 & 24 & -1.10673 & 8.57414 & 9 & GroupA \\\\\n",
       "\t15 & 25 & -3.21136 & 9.34588 & 5 & GroupA \\\\\n",
       "\t16 & 26 & -0.0740145 & 11.0297 & 9 & GroupA \\\\\n",
       "\t17 & 27 & 0.150976 & 14.8349 & 10 & GroupA \\\\\n",
       "\t18 & 28 & 0.769278 & 9.38405 & 14 & GroupA \\\\\n",
       "\t19 & 29 & -0.310153 & 12.4906 & 15 & GroupA \\\\\n",
       "\t20 & 30 & -0.602707 & 9.9001 & 7 & GroupA \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "20×5 DataFrame\n",
       "│ Row │ ID    │ var1       │ var2    │ var3  │ Col1   │\n",
       "│     │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m    │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
       "├─────┼───────┼────────────┼─────────┼───────┼────────┤\n",
       "│ 1   │ 11    │ -0.560501  │ 9.83968 │ 15    │ GroupA │\n",
       "│ 2   │ 12    │ -0.0192918 │ 7.81756 │ 14    │ GroupB │\n",
       "│ 3   │ 13    │ 0.128064   │ 8.83897 │ 11    │ GroupB │\n",
       "│ 4   │ 14    │ 1.85278    │ 9.36913 │ 10    │ GroupB │\n",
       "│ 5   │ 15    │ -0.827763  │ 7.2771  │ 15    │ GroupB │\n",
       "│ 6   │ 16    │ 0.110096   │ 9.77109 │ 15    │ GroupA │\n",
       "│ 7   │ 17    │ -0.251176  │ 10.3317 │ 6     │ GroupB │\n",
       "│ 8   │ 18    │ 0.369714   │ 9.18312 │ 5     │ GroupA │\n",
       "│ 9   │ 19    │ 0.0721164  │ 7.98043 │ 12    │ GroupB │\n",
       "│ 10  │ 20    │ -1.50343   │ 8.91239 │ 13    │ GroupA │\n",
       "│ 11  │ 21    │ 1.56417    │ 7.54655 │ 14    │ GroupB │\n",
       "│ 12  │ 22    │ -1.39674   │ 8.91657 │ 5     │ GroupB │\n",
       "│ 13  │ 23    │ 1.1055     │ 8.62701 │ 8     │ GroupA │\n",
       "│ 14  │ 24    │ -1.10673   │ 8.57414 │ 9     │ GroupA │\n",
       "│ 15  │ 25    │ -3.21136   │ 9.34588 │ 5     │ GroupA │\n",
       "│ 16  │ 26    │ -0.0740145 │ 11.0297 │ 9     │ GroupA │\n",
       "│ 17  │ 27    │ 0.150976   │ 14.8349 │ 10    │ GroupA │\n",
       "│ 18  │ 28    │ 0.769278   │ 9.38405 │ 14    │ GroupA │\n",
       "│ 19  │ 29    │ -0.310153  │ 12.4906 │ 15    │ GroupA │\n",
       "│ 20  │ 30    │ -0.602707  │ 9.9001  │ 7     │ GroupA │"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "insertcols!(df2,:Col1 => rand([\"GroupA\",\"GroupB\"],20))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "8. In a code cells below, create a DataFrame named df3 with columns named *id*, var4 and var5 such that id contains the values 11 through 30, var4 contains the values 21 through 40 and var5 contains the values 41 through 60."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var4</th><th>var5</th></tr><tr><th></th><th>Int64</th><th>Int64</th><th>Int64</th></tr></thead><tbody><p>20 rows × 3 columns</p><tr><th>1</th><td>11</td><td>21</td><td>41</td></tr><tr><th>2</th><td>12</td><td>22</td><td>42</td></tr><tr><th>3</th><td>13</td><td>23</td><td>43</td></tr><tr><th>4</th><td>14</td><td>24</td><td>44</td></tr><tr><th>5</th><td>15</td><td>25</td><td>45</td></tr><tr><th>6</th><td>16</td><td>26</td><td>46</td></tr><tr><th>7</th><td>17</td><td>27</td><td>47</td></tr><tr><th>8</th><td>18</td><td>28</td><td>48</td></tr><tr><th>9</th><td>19</td><td>29</td><td>49</td></tr><tr><th>10</th><td>20</td><td>30</td><td>50</td></tr><tr><th>11</th><td>21</td><td>31</td><td>51</td></tr><tr><th>12</th><td>22</td><td>32</td><td>52</td></tr><tr><th>13</th><td>23</td><td>33</td><td>53</td></tr><tr><th>14</th><td>24</td><td>34</td><td>54</td></tr><tr><th>15</th><td>25</td><td>35</td><td>55</td></tr><tr><th>16</th><td>26</td><td>36</td><td>56</td></tr><tr><th>17</th><td>27</td><td>37</td><td>57</td></tr><tr><th>18</th><td>28</td><td>38</td><td>58</td></tr><tr><th>19</th><td>29</td><td>39</td><td>59</td></tr><tr><th>20</th><td>30</td><td>40</td><td>60</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|ccc}\n",
       "\t& ID & var4 & var5\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Int64 & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 11 & 21 & 41 \\\\\n",
       "\t2 & 12 & 22 & 42 \\\\\n",
       "\t3 & 13 & 23 & 43 \\\\\n",
       "\t4 & 14 & 24 & 44 \\\\\n",
       "\t5 & 15 & 25 & 45 \\\\\n",
       "\t6 & 16 & 26 & 46 \\\\\n",
       "\t7 & 17 & 27 & 47 \\\\\n",
       "\t8 & 18 & 28 & 48 \\\\\n",
       "\t9 & 19 & 29 & 49 \\\\\n",
       "\t10 & 20 & 30 & 50 \\\\\n",
       "\t11 & 21 & 31 & 51 \\\\\n",
       "\t12 & 22 & 32 & 52 \\\\\n",
       "\t13 & 23 & 33 & 53 \\\\\n",
       "\t14 & 24 & 34 & 54 \\\\\n",
       "\t15 & 25 & 35 & 55 \\\\\n",
       "\t16 & 26 & 36 & 56 \\\\\n",
       "\t17 & 27 & 37 & 57 \\\\\n",
       "\t18 & 28 & 38 & 58 \\\\\n",
       "\t19 & 29 & 39 & 59 \\\\\n",
       "\t20 & 30 & 40 & 60 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "20×3 DataFrame\n",
       "│ Row │ ID    │ var4  │ var5  │\n",
       "│     │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
       "├─────┼───────┼───────┼───────┤\n",
       "│ 1   │ 11    │ 21    │ 41    │\n",
       "│ 2   │ 12    │ 22    │ 42    │\n",
       "│ 3   │ 13    │ 23    │ 43    │\n",
       "│ 4   │ 14    │ 24    │ 44    │\n",
       "│ 5   │ 15    │ 25    │ 45    │\n",
       "│ 6   │ 16    │ 26    │ 46    │\n",
       "│ 7   │ 17    │ 27    │ 47    │\n",
       "│ 8   │ 18    │ 28    │ 48    │\n",
       "│ 9   │ 19    │ 29    │ 49    │\n",
       "│ 10  │ 20    │ 30    │ 50    │\n",
       "│ 11  │ 21    │ 31    │ 51    │\n",
       "│ 12  │ 22    │ 32    │ 52    │\n",
       "│ 13  │ 23    │ 33    │ 53    │\n",
       "│ 14  │ 24    │ 34    │ 54    │\n",
       "│ 15  │ 25    │ 35    │ 55    │\n",
       "│ 16  │ 26    │ 36    │ 56    │\n",
       "│ 17  │ 27    │ 37    │ 57    │\n",
       "│ 18  │ 28    │ 38    │ 58    │\n",
       "│ 19  │ 29    │ 39    │ 59    │\n",
       "│ 20  │ 30    │ 40    │ 60    │"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = DataFrame(ID = collect(11:30), var4 = collect(21:40), var5 = collect(41:60))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "9. In a code cells below, do a join of DataFrames df2 and df3 on the id column and save the result as a new dataframe called df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>ID</th><th>var1</th><th>var2</th><th>var3</th><th>Col1</th><th>var4</th><th>var5</th></tr><tr><th></th><th>Int64</th><th>Float64</th><th>Float64</th><th>Int64</th><th>String</th><th>Int64</th><th>Int64</th></tr></thead><tbody><p>20 rows × 7 columns</p><tr><th>1</th><td>11</td><td>-0.560501</td><td>9.83968</td><td>15</td><td>GroupA</td><td>21</td><td>41</td></tr><tr><th>2</th><td>12</td><td>-0.0192918</td><td>7.81756</td><td>14</td><td>GroupB</td><td>22</td><td>42</td></tr><tr><th>3</th><td>13</td><td>0.128064</td><td>8.83897</td><td>11</td><td>GroupB</td><td>23</td><td>43</td></tr><tr><th>4</th><td>14</td><td>1.85278</td><td>9.36913</td><td>10</td><td>GroupB</td><td>24</td><td>44</td></tr><tr><th>5</th><td>15</td><td>-0.827763</td><td>7.2771</td><td>15</td><td>GroupB</td><td>25</td><td>45</td></tr><tr><th>6</th><td>16</td><td>0.110096</td><td>9.77109</td><td>15</td><td>GroupA</td><td>26</td><td>46</td></tr><tr><th>7</th><td>17</td><td>-0.251176</td><td>10.3317</td><td>6</td><td>GroupB</td><td>27</td><td>47</td></tr><tr><th>8</th><td>18</td><td>0.369714</td><td>9.18312</td><td>5</td><td>GroupA</td><td>28</td><td>48</td></tr><tr><th>9</th><td>19</td><td>0.0721164</td><td>7.98043</td><td>12</td><td>GroupB</td><td>29</td><td>49</td></tr><tr><th>10</th><td>20</td><td>-1.50343</td><td>8.91239</td><td>13</td><td>GroupA</td><td>30</td><td>50</td></tr><tr><th>11</th><td>21</td><td>1.56417</td><td>7.54655</td><td>14</td><td>GroupB</td><td>31</td><td>51</td></tr><tr><th>12</th><td>22</td><td>-1.39674</td><td>8.91657</td><td>5</td><td>GroupB</td><td>32</td><td>52</td></tr><tr><th>13</th><td>23</td><td>1.1055</td><td>8.62701</td><td>8</td><td>GroupA</td><td>33</td><td>53</td></tr><tr><th>14</th><td>24</td><td>-1.10673</td><td>8.57414</td><td>9</td><td>GroupA</td><td>34</td><td>54</td></tr><tr><th>15</th><td>25</td><td>-3.21136</td><td>9.34588</td><td>5</td><td>GroupA</td><td>35</td><td>55</td></tr><tr><th>16</th><td>26</td><td>-0.0740145</td><td>11.0297</td><td>9</td><td>GroupA</td><td>36</td><td>56</td></tr><tr><th>17</th><td>27</td><td>0.150976</td><td>14.8349</td><td>10</td><td>GroupA</td><td>37</td><td>57</td></tr><tr><th>18</th><td>28</td><td>0.769278</td><td>9.38405</td><td>14</td><td>GroupA</td><td>38</td><td>58</td></tr><tr><th>19</th><td>29</td><td>-0.310153</td><td>12.4906</td><td>15</td><td>GroupA</td><td>39</td><td>59</td></tr><tr><th>20</th><td>30</td><td>-0.602707</td><td>9.9001</td><td>7</td><td>GroupA</td><td>40</td><td>60</td></tr></tbody></table>"
      ],
      "text/latex": [
       "\\begin{tabular}{r|ccccccc}\n",
       "\t& ID & var1 & var2 & var3 & Col1 & var4 & var5\\\\\n",
       "\t\\hline\n",
       "\t& Int64 & Float64 & Float64 & Int64 & String & Int64 & Int64\\\\\n",
       "\t\\hline\n",
       "\t1 & 11 & -0.560501 & 9.83968 & 15 & GroupA & 21 & 41 \\\\\n",
       "\t2 & 12 & -0.0192918 & 7.81756 & 14 & GroupB & 22 & 42 \\\\\n",
       "\t3 & 13 & 0.128064 & 8.83897 & 11 & GroupB & 23 & 43 \\\\\n",
       "\t4 & 14 & 1.85278 & 9.36913 & 10 & GroupB & 24 & 44 \\\\\n",
       "\t5 & 15 & -0.827763 & 7.2771 & 15 & GroupB & 25 & 45 \\\\\n",
       "\t6 & 16 & 0.110096 & 9.77109 & 15 & GroupA & 26 & 46 \\\\\n",
       "\t7 & 17 & -0.251176 & 10.3317 & 6 & GroupB & 27 & 47 \\\\\n",
       "\t8 & 18 & 0.369714 & 9.18312 & 5 & GroupA & 28 & 48 \\\\\n",
       "\t9 & 19 & 0.0721164 & 7.98043 & 12 & GroupB & 29 & 49 \\\\\n",
       "\t10 & 20 & -1.50343 & 8.91239 & 13 & GroupA & 30 & 50 \\\\\n",
       "\t11 & 21 & 1.56417 & 7.54655 & 14 & GroupB & 31 & 51 \\\\\n",
       "\t12 & 22 & -1.39674 & 8.91657 & 5 & GroupB & 32 & 52 \\\\\n",
       "\t13 & 23 & 1.1055 & 8.62701 & 8 & GroupA & 33 & 53 \\\\\n",
       "\t14 & 24 & -1.10673 & 8.57414 & 9 & GroupA & 34 & 54 \\\\\n",
       "\t15 & 25 & -3.21136 & 9.34588 & 5 & GroupA & 35 & 55 \\\\\n",
       "\t16 & 26 & -0.0740145 & 11.0297 & 9 & GroupA & 36 & 56 \\\\\n",
       "\t17 & 27 & 0.150976 & 14.8349 & 10 & GroupA & 37 & 57 \\\\\n",
       "\t18 & 28 & 0.769278 & 9.38405 & 14 & GroupA & 38 & 58 \\\\\n",
       "\t19 & 29 & -0.310153 & 12.4906 & 15 & GroupA & 39 & 59 \\\\\n",
       "\t20 & 30 & -0.602707 & 9.9001 & 7 & GroupA & 40 & 60 \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/plain": [
       "20×7 DataFrame\n",
       "│ Row │ ID    │ var1       │ var2    │ var3  │ Col1   │ var4  │ var5  │\n",
       "│     │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m    │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
       "├─────┼───────┼────────────┼─────────┼───────┼────────┼───────┼───────┤\n",
       "│ 1   │ 11    │ -0.560501  │ 9.83968 │ 15    │ GroupA │ 21    │ 41    │\n",
       "│ 2   │ 12    │ -0.0192918 │ 7.81756 │ 14    │ GroupB │ 22    │ 42    │\n",
       "│ 3   │ 13    │ 0.128064   │ 8.83897 │ 11    │ GroupB │ 23    │ 43    │\n",
       "│ 4   │ 14    │ 1.85278    │ 9.36913 │ 10    │ GroupB │ 24    │ 44    │\n",
       "│ 5   │ 15    │ -0.827763  │ 7.2771  │ 15    │ GroupB │ 25    │ 45    │\n",
       "│ 6   │ 16    │ 0.110096   │ 9.77109 │ 15    │ GroupA │ 26    │ 46    │\n",
       "│ 7   │ 17    │ -0.251176  │ 10.3317 │ 6     │ GroupB │ 27    │ 47    │\n",
       "│ 8   │ 18    │ 0.369714   │ 9.18312 │ 5     │ GroupA │ 28    │ 48    │\n",
       "│ 9   │ 19    │ 0.0721164  │ 7.98043 │ 12    │ GroupB │ 29    │ 49    │\n",
       "│ 10  │ 20    │ -1.50343   │ 8.91239 │ 13    │ GroupA │ 30    │ 50    │\n",
       "│ 11  │ 21    │ 1.56417    │ 7.54655 │ 14    │ GroupB │ 31    │ 51    │\n",
       "│ 12  │ 22    │ -1.39674   │ 8.91657 │ 5     │ GroupB │ 32    │ 52    │\n",
       "│ 13  │ 23    │ 1.1055     │ 8.62701 │ 8     │ GroupA │ 33    │ 53    │\n",
       "│ 14  │ 24    │ -1.10673   │ 8.57414 │ 9     │ GroupA │ 34    │ 54    │\n",
       "│ 15  │ 25    │ -3.21136   │ 9.34588 │ 5     │ GroupA │ 35    │ 55    │\n",
       "│ 16  │ 26    │ -0.0740145 │ 11.0297 │ 9     │ GroupA │ 36    │ 56    │\n",
       "│ 17  │ 27    │ 0.150976   │ 14.8349 │ 10    │ GroupA │ 37    │ 57    │\n",
       "│ 18  │ 28    │ 0.769278   │ 9.38405 │ 14    │ GroupA │ 38    │ 58    │\n",
       "│ 19  │ 29    │ -0.310153  │ 12.4906 │ 15    │ GroupA │ 39    │ 59    │\n",
       "│ 20  │ 30    │ -0.602707  │ 9.9001  │ 7     │ GroupA │ 40    │ 60    │"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4 = innerjoin(df2,df3,on = :ID)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 1.2.0",
   "language": "julia",
   "name": "julia-1.2"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.2.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/data.ipynb
+++ b/data.ipynb
--- a/Week4_Working_with_data.jl
+++ b/Week4_Working_with_data.jl
@ -0,0 +1,201 @@
 # WORKING WITH DATA
 # -----------------
 # I Distributions
 # * Data point values for a distribution usually follow a pattern
 # * Such patterns are called distributions
 # * Distributions are either discrete or continuous
 # * The Distribution.jl package contains most of the common
 #       data distributions
 # 1 Importing Distributions.jl
 using Distributions
 using Random
 # 2 The standard normal distribution
 #Seed the pseudo-random number generator
 Random.seed!(1234)
 #Saving the standard normal distribution as an object
 n = Normal()
 #Parameter values of the standard normal distribution
 params(n)
 #Select 10 elements at random from n
 var1 = rand(n, 10)
 #Calculating the mean and standard deviation of var1
 mean(var1)
 std(var1)
 #Probability density function value at x = 0.3
 pdf(Normal(), 0.3)
 #Cumulative distribution function as x = 0.25
 cdf(Normal(), 0.25)
 #Quantiles
 quantile(Normal(), 0.025)
 quantile(Normal(), 0.975)
 # 3 The normal distribution
 #Returning the parameters of the normal distribution
 fieldnames(Normal)
 #Creating 100 data point values from a normal distribution
 #  with a mean of 100 and a standard deviation of 10
 var2 = rand(Normal(100, 10), 100)
 #Calculating the mean and standard deviation of var2
 mean(var2)
 std(var2)
 #Using fit() to calculate the parameters of a distribution
 fit(Normal, var2)
 # 3 Skewness and kurtosis
 skewness(var2)
 kurtosis(var2)
 # 4 Beta distribution
 b = Beta(1, 1)
 params(b)
 var3 = rand(b, 100)
 fit(Beta, var3)
 # 5 χ2 distribution
 c = Chisq(1)
 var4 = rand(c, 100)
 fieldnames(Chisq) # Degrees of freedom
 # 6 Distribution types are hierarchical
 supertype(Normal)
 subtypes(Distribution{Univariate,Continuous})
 subtypes(Distribution{Univariate,Discrete})
 # * Search for help in the REPL
 # II DataFrames
 using DataFrames
 # * Allows for creation of a flat data structure (rows and columns)
 # * Columns are variables
 # * Rows are subjects (examples)
 # 1 Create a DataFrame
 typeof(var2)
 #Create and empty DataFrame
 df = DataFrame()
 # 2 Add a column with data point values (rows)
 df[:Var2] = var2
 #View first five rows
 head(df)
 # 3 Add another columns
 df[:Var3] = var3
 #View last three rows
 tail(df, 3)
 # 4 Dimensions of a DataFrame
 size(df)
 rows = size(df, 1)
 columns = size(df, 2)
 # 5 Inspect content
 showcols(df)
 #Data type only
 eltypes(df)
 #Descriptive statistics
 describe(df)
 #Print in console
 print(describe(df))
 # 6 Create a bigger DataFrame
 df2 = DataFrame()
 df2[:A] = 1:10
 df2[:B] = ["I", "II", "II", "I", "II","I", "II", "II", "I", "II"]
 df2[:C] = rand(Normal(), 10)
 df2[:D] = rand(Chisq(1), 10)
 # 7 Slicing
 #First three rows
 df2[1:3, :]
 #All rows columns 1 and 3
 df2[:, [1, 3]]
 #Different notation
 df2[:, [:A, :C]]
 # III Importing data files
 # 1 Set working directory in Atom settings under Julia tab
 pwd()
 # 2 Import CSV
 using CSV
 # 3 Import csv file (in same directory)
 data1 = CSV.read("CCS.csv")
 # 4 Explore the data
 typeof(data1)
 head(data1)
 showcols(data1)
 eltypes(data1)
 describe(data1)
 # 5 Combining DataFrames
 #Creating DataFrames
 subjects = DataFrame(Number = [100, 101, 102, 103], Stage = ["I", "III", "II", "I"])
 treatment  = DataFrame(Number = [103, 102, 101, 100], Treatment = ["A", "B", "A", "B"])
 subjects
 treatment
 #Joining
 df3 = join(subjects, treatment, on = :Number);
 df3
 #Adding a longer list of subjects
 subjects = DataFrame(Number = [100, 101, 102, 103, 104, 105], Stage = ["I", "III", "II", "I", "II", "II"])
 #Inner join
 df4 = join(subjects, treatment, on = :Number, kind = :inner);
 df4
 #Outer joing: empty fields filled with missing
 df5  = join(subjects, treatment, on = :Number, kind = :outer);
 df5
 # 6 Grouping
 #Creating a new DataFrame
 df6 = DataFrame(Group = rand(["A", "B", "C"], 15), Variable1 = randn(15), Variable2 = rand(15));
 df6
 #Grouping using by()
 by(df6, :Group, size)
 #Count unique data point values in :Group column
 by(df6, :Group, dfc -> DataFrame(Count = size(dfc, 1)))
 #Aggregate for descriptive statistics
 print(aggregate(df6, :Group, [mean, std]))
 #Group
 groupby(df6, :Group)
 length(groupby(df6, :Group))
 groupby(df6, :Group)[2]
 # 7 Selection
 df6[:Variable1] .> 0
 sub(df6, df6[:Variable1] .> 0)
 # 8 New DataFrame by selection
 df6A = sub(df6, df6[:Group] .== "A");
 df6A
 # 9 Sorting
 df6S = sort!(df6, cols = [:Group, :Variable1], rev = true);
 df6S
 df6S2 = sort!(df6, cols = [:Group, :Variable1, :Variable2], rev = (false, false, true));
 df6S2
 # 10 Unique rows
 #Creating a DataFrame with an obvious duplicate row
 df7 = DataFrame(A = [1, 2, 2, 3, 4, 5],  B = [11, 12, 12, 13, 14, 15], C = ["A", "B", "B", "C", "D", "E"]);
 df7
 #Only unique rows
 unique(df7)
 df7
 #Permanant change
 unique!(df7)
 df7
 # 11 Delete rows
 #Permanently
 deleterows!(df7, [1, 5])
 df7