diff --git a/Week4_Collections.ipynb b/Week4_Collections.ipynb
new file mode 100644
index 0000000..a889bf6
--- /dev/null
+++ b/Week4_Collections.ipynb
@@ -0,0 +1,2223 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Collections"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## In this lecture"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- [Introduction](#Introduction)\n",
+ "- [Arrays](#Arrays)\n",
+ "- [Tuples](#Tuples)\n",
+ "- [Dictionaries](#Dictionaries)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Introduction"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Collections are groups of elements. These elements are values of different Julia types. Storing elements in collections is one of the most useful operations in computing."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Arrays"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Arrays are collections of values separated with commas and placed inside of a set of square brackets. They can be represented in column or in row form."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 3"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# A column vector\n",
+ "array1 = [1, 2, 3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `typeof()` function shows that `array1` is an instance of an array object, containing integer values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Array{Int64,1}"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The type of the object array1\n",
+ "typeof(array1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below we create `array2`. Note that there are only spaces between the elements."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1×3 Array{Int64,2}:\n",
+ " 1 2 3"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# A row vector\n",
+ "array2 = [1 2 3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `transpose()` function will create a linear algebra transpose of our column vector, `array1`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1×3 LinearAlgebra.Transpose{Int64,Array{Int64,1}}:\n",
+ " 1 2 3"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The transpose\n",
+ "transpose(array1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When the types of the elemnts are not the same, all elements _inherit_ the _highest_ type."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3-element Array{Float64,1}:\n",
+ " 1.0\n",
+ " 2.0\n",
+ " 3.0"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# With a mix of types, all the elements inherent the \"highest\" type\n",
+ "array2 = [1, 2, 3.0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.0"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Index for one of the original integers will be Float64\n",
+ "array2[1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Arrays can have more than one _dimension_ (here dimension does not refer to the number of elements in a vector, representing a vector field)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 Array{Int64,2}:\n",
+ " 1 4 7\n",
+ " 2 5 8\n",
+ " 3 6 9"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Column-wise entry of multidimensional array\n",
+ "array3 = [[1, 2, 3] [4, 5, 6] [7, 8, 9]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 Array{Int64,2}:\n",
+ " 1 2 3\n",
+ " 4 5 6\n",
+ " 7 8 9"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Row-wise entry of multidimensional array\n",
+ "array4 = [[1 2 3]; [4 5 6]; [7 8 9]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `length()` function returns the number of elements."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Length of array3\n",
+ "length(array3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "length(array4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Since the two arrays above were created differently, let's take a look at indices of there elements."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Element 1 is 1\n",
+ "Element 2 is 2\n",
+ "Element 3 is 3\n",
+ "Element 4 is 4\n",
+ "Element 5 is 5\n",
+ "Element 6 is 6\n",
+ "Element 7 is 7\n",
+ "Element 8 is 8\n",
+ "Element 9 is 9\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Index order of column-wise array\n",
+ "for i in 1:length(array3)\n",
+ " println(\"Element $(i) is \", array3[i])\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Element 1 is 1\n",
+ "Element 2 is 4\n",
+ "Element 3 is 7\n",
+ "Element 4 is 2\n",
+ "Element 5 is 5\n",
+ "Element 6 is 8\n",
+ "Element 7 is 3\n",
+ "Element 8 is 6\n",
+ "Element 9 is 9\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Index order of row-wise array\n",
+ "for i in 1:length(array4)\n",
+ " println(\"Element $(i) is \", array4[i])\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Elements can be repeated using the `repeat()` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 1\n",
+ " 2\n",
+ " 1\n",
+ " 2"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Using repeat() to repeat column elements\n",
+ "repeat([1, 2], 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×2 Array{Int64,2}:\n",
+ " 1 2\n",
+ " 1 2\n",
+ " 1 2"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Using repeat() to repeat row elements\n",
+ "repeat([1 2], 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `range()` function ccreates a range object. The first argument is the value of the first element. The `step = ` argument specifies the stepsize, and the `length =` argument specifies how many elements the array should have."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1:1:10"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Using range(start, step, number of elements)\n",
+ "range(1, step = 1, length = 10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can change the range object into an array using the `collect()` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 3\n",
+ " 4\n",
+ " 5\n",
+ " 6\n",
+ " 7\n",
+ " 8\n",
+ " 9\n",
+ " 10"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create collections using the collect() function\n",
+ "collect(range(1, step = 1, length = 10))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 3\n",
+ " 4\n",
+ " 5\n",
+ " 6\n",
+ " 7\n",
+ " 8\n",
+ " 9\n",
+ " 10"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Short-hand syntax\n",
+ "collect(1:10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can create empty arrays as placeholders."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2×3 Array{Union{Missing, Int64},2}:\n",
+ " missing missing missing\n",
+ " missing missing missing"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating empty array with two rows and three columns\n",
+ "array5 = Array{Union{Missing, Int}}(missing, 2, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Reshaping is achieved using the `reshape()` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×2 reshape(::Array{Union{Missing, Int64},2}, 3, 2) with eltype Union{Missing, Int64}:\n",
+ " missing missing\n",
+ " missing missing\n",
+ " missing missing"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Reshaping\n",
+ "reshape(array5, 3, 2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Every element in an arrays has an index (address) value. We already saw this above when we created a for-loop to cycle through the values of our row vs. column created arrays."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10×5 Array{Int64,2}:\n",
+ " 20 12 11 13 18\n",
+ " 10 17 10 15 19\n",
+ " 16 13 11 11 13\n",
+ " 10 17 17 11 17\n",
+ " 16 20 17 13 13\n",
+ " 15 11 15 20 20\n",
+ " 20 17 20 19 20\n",
+ " 10 10 19 15 17\n",
+ " 13 14 10 10 20\n",
+ " 15 18 18 10 13"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating a 10 x 5 array with each element drawn randomly from value 10 through 20\n",
+ "array6 = rand(10:20, 10, 5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Indexing is indicated with square brackets. For arrays with rows and columns, the index values will be in the form `[row, column]`. A colon serves as short-hand syntax indicating _all_ values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element Array{Int64,1}:\n",
+ " 20\n",
+ " 10\n",
+ " 16\n",
+ " 10\n",
+ " 16\n",
+ " 15\n",
+ " 20\n",
+ " 10\n",
+ " 13\n",
+ " 15"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#A ll rows in first column\n",
+ "array6[:, 1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4-element Array{Int64,1}:\n",
+ " 17\n",
+ " 13\n",
+ " 17\n",
+ " 20"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Rows two through five of second column\n",
+ "array6[2:5, 2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×2 Array{Int64,2}:\n",
+ " 10 19\n",
+ " 10 17\n",
+ " 15 20"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Values in rows 2, 4, 6, and in columns 1 and 5\n",
+ "array6[[2, 4, 6], [1, 5]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3-element Array{Int64,1}:\n",
+ " 11\n",
+ " 13\n",
+ " 18"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Values in row 1 from column 3 to the last column\n",
+ "array6[1, 3:end]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Boolean logic can be used to select values based on rules. Below we check if each value in column one is equal to or greater than $12$."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element BitArray{1}:\n",
+ " 1\n",
+ " 0\n",
+ " 1\n",
+ " 0\n",
+ " 1\n",
+ " 1\n",
+ " 1\n",
+ " 0\n",
+ " 1\n",
+ " 1"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Boolean logic (returning only true and false)\n",
+ "array6[:, 1] .> 12"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can add values to an array using the `push!()` function. Many functions in Julia have an added exclamation mark, called a _bang_. It is used to make permanent changes to the values in a computer variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 3\n",
+ " 4\n",
+ " 5\n",
+ " 10"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating a five element array\n",
+ "array7 = [1, 2, 3, 4, 5]\n",
+ "# Permanantly append 10 to end of array\n",
+ "push!(array7, 10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `pop!()` function removes the last element (the bang makes it permanent)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pop!(array7)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also change the value of an element by using its index."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1000"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Change second element value to 1000\n",
+ "array7[2] = 1000"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 1\n",
+ " 1000\n",
+ " 3\n",
+ " 4\n",
+ " 5"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Viewing the change\n",
+ "array7"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "_List comprehension_ is a term that refers to the creating of an array using a _recipe_. View the following example."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 3\n",
+ " 6\n",
+ " 9\n",
+ " 12\n",
+ " 15"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# An example of list comprehension\n",
+ "array8 = [3 * i for i in 1:5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The Julia syntax is very expressive, as the above example shows. Square brackets indicate that we are creating a list. The exprssion, `3 * i` indicates what we want each element to look like. The for-loop uses the palceholder over which we wish to iterate, together with the range that we require."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This allows for very complex array creation, which makes it quite versatile."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 Array{Int64,2}:\n",
+ " 1 2 3\n",
+ " 2 4 6\n",
+ " 3 6 9"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Column-wise collection iterating through second element first\n",
+ "array9 = [a * b for a in 1:3, b in 1:3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Arithmetic operations on arrays are performed through the process of _broadcasting_. Below we add $1$ to each element in `array8`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 4\n",
+ " 7\n",
+ " 10\n",
+ " 13\n",
+ " 16"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Elementwise addition of a scalar using dot notation\n",
+ "array8 .+ 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When arrays are of similar shape, we can do elemnt wise addition."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 4\n",
+ " 1006\n",
+ " 12\n",
+ " 16\n",
+ " 20"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Elementwise addition of similar sized arrays\n",
+ "array7 + array8"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "While it is nice to have a complete set of elemnts, data is often _missing_. Missing is a Julia data type that provides a placeholder for missing data in a statistical sense. It propagates automatically and its equality as a type can be tested. Sorting is possible since missing is seen as greater than other values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "missing"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Propagation\n",
+ "missing + 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "missing"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "missing > 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Union{Missing, Int64},1}:\n",
+ " 11 \n",
+ " 22 \n",
+ " 33 \n",
+ " missing\n",
+ " 55 "
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "[1, 2, 3, missing, 5] + [10, 20, 30, 40 ,50]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "missing"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Checking equality of value using ==\n",
+ "# Cannot return true or false since value is not known\n",
+ "missing == missing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "true"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Checking equality of type with ===\n",
+ "missing === missing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "true"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Checking type equality with isequal()\n",
+ "isequal(missing, missing)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "true"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sorting with isless()\n",
+ "isless(1, missing)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "true"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Checking on infinity\n",
+ "isless(Inf, missing)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can create an array of zeros."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 Array{Int8,2}:\n",
+ " 0 0 0\n",
+ " 0 0 0\n",
+ " 0 0 0"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# A 3 x 3 array of integer zeros\n",
+ "array11 = zeros(Int8, 3, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here is an array of ones."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 Array{Float16,2}:\n",
+ " 1.0 1.0 1.0\n",
+ " 1.0 1.0 1.0\n",
+ " 1.0 1.0 1.0"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# A 3 x 3 array of floating point ones\n",
+ "array12 = ones(Float16, 3, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Boolean values are also allowed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 BitArray{2}:\n",
+ " 1 1 1\n",
+ " 1 1 1\n",
+ " 1 1 1"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Array of true (bit array) values\n",
+ "array13 = trues(3, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can even fill an array with a specified value."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 Array{Int64,2}:\n",
+ " 10 10 10\n",
+ " 10 10 10\n",
+ " 10 10 10"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Fill an array with elements of value x\n",
+ "array14 = fill(10, 3, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We have already seen that elemnts of different types all inherit the _highest_ type. We can in fact, change the type manually, with the convert function. As elsewhere in Julia, the dot opetaror maps the function to each element of a list."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×3 Array{Float16,2}:\n",
+ " 10.0 10.0 10.0\n",
+ " 10.0 10.0 10.0\n",
+ " 10.0 10.0 10.0"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Convert elements to a different data type\n",
+ "convert.(Float16, array14)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Arrays can be concatenated."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 3\n",
+ " 10\n",
+ " 20\n",
+ " 30"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Concatenate arrays along rows (makes rows)\n",
+ "array15 = [1, 2, 3]\n",
+ "array16 = [10, 20, 30]\n",
+ "cat(array15, array16, dims = 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 3\n",
+ " 10\n",
+ " 20\n",
+ " 30"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Same as above\n",
+ "vcat(array15, array16)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×2 Array{Int64,2}:\n",
+ " 1 10\n",
+ " 2 20\n",
+ " 3 30"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Concatenate arrays along columns (makes columns)\n",
+ "cat(array15, array16, dims = 2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3×2 Array{Int64,2}:\n",
+ " 1 10\n",
+ " 2 20\n",
+ " 3 30"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Same as above\n",
+ "hcat(array15, array16)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Tuples"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Tuples are immutable collections. Immutable refers to the fact that the values are set and cannot be changed. This type is indicated by the use of parenthesis instead of square brackets."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(1, 2, 3, 4, \"Julia\")"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Tuples with mixed types\n",
+ "tuple1 = (1, 2, 3, 4, \"Julia\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's check on the values and types of each element."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " The value of the tuple at index number 1 is 1 and the type is Int64.\n",
+ " The value of the tuple at index number 2 is 2 and the type is Int64.\n",
+ " The value of the tuple at index number 3 is 3 and the type is Int64.\n",
+ " The value of the tuple at index number 4 is 4 and the type is Int64.\n",
+ " The value of the tuple at index number 5 is Julia and the type is String.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# For loop to look at value and type of each element\n",
+ "for i in 1:length(tuple1)\n",
+ " println(\" The value of the tuple at index number $(i) is $(tuple1[i]) and the type is $(typeof(tuple1[i])).\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Tuples are useful as each elemnt can be named."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Each element can be named\n",
+ "a, b, c, seven = (1, 3, 5, 7)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "7"
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "seven"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A range can be used to reverse the order of a tuple."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(\"Julia\", 4, 3, 2, 1)"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Reverse order index (can be done with arrays too)\n",
+ "tuple1[end:-1:1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Arrays can be made up of elemnts of different length."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((1, 2, 3), 1, 2, (3, 100, 1))"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Mixed length tuples\n",
+ "tuple2 = ((1, 2, 3), 1, 2, (3, 100, 1))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(3, 100, 1)"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Element 4\n",
+ "tuple2[4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "100"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Element 2 in element 4\n",
+ "tuple2[4][2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Dictionaries"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Dictionaries are collection sof key-value pairs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{Int64,Int64} with 3 entries:\n",
+ " 2 => 66\n",
+ " 3 => 1\n",
+ " 1 => 77"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1 Example of a dictionary\n",
+ "dictionary1 = Dict(1 => 77, 2 => 66, 3 => 1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the example above we have key-values of `1,2,3` and value-values of `77,66,1`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{Int64,Int64} with 3 entries:\n",
+ " 2 => 200\n",
+ " 3 => 300\n",
+ " 1 => 100"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The => is shorthand for the Pair() function\n",
+ "dictionary2 = Dict(Pair(1,100), Pair(2,200), Pair(3,300))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can specify the types used in a dict."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{Any,Any} with 3 entries:\n",
+ " 2 => 66\n",
+ " 3 => \"three\"\n",
+ " 1 => 77"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2 Specifying types\n",
+ "dictionary3 = Dict{Any, Any}(1 => 77, 2 => 66, 3 => \"three\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{Any,Any} with 2 entries:\n",
+ " (2, 3) => \"hello\"\n",
+ " \"a\" => 1"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# We can get a bit crazy\n",
+ "dictionary4 = Dict{Any, Any}(\"a\" => 1, (2, 3) => \"hello\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It is perhaps more useful to use symbols (colon symbol and a name) as key values. We can then refer to the key-name when we want to inquire about its value."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "300"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Using symbols as keys\n",
+ "dictionary5 = Dict(:A => 300, :B => 305, :C => 309)\n",
+ "dictionary5[:A]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can check on the key-value pairs in a dictionary."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "true"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Using in() to check on key-value pairs\n",
+ "in((:A => 300), dictionary5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Change value using the key is easy to perform."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{Symbol,Int64} with 3 entries:\n",
+ " :A => 300\n",
+ " :B => 305\n",
+ " :C => 1000"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Changing an existing value\n",
+ "dictionary5[:C] = 1000\n",
+ "dictionary5"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `delete!()` function permanently deletes a key-value pair."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{Symbol,Int64} with 2 entries:\n",
+ " :B => 305\n",
+ " :C => 1000"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Using the delete!() function\n",
+ "delete!(dictionary5, :A)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can list both the keys and the values in a dictionary."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Base.KeySet for a Dict{Symbol,Int64} with 2 entries. Keys:\n",
+ " :B\n",
+ " :C"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The keys of a dictionary\n",
+ "keys(dictionary5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Base.ValueIterator for a Dict{Symbol,Int64} with 2 entries. Values:\n",
+ " 305\n",
+ " 1000"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "values(dictionary5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Through the use of iteration, we can get create in the creation and interrogation of a dictionary."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Creating a dictionary with automatic keys\n",
+ "procedure_vals = [\"Appendectomy\", \"Colectomy\", \"Cholecystectomy\"]\n",
+ "procedure_dict = Dict{AbstractString,AbstractString}()\n",
+ "for (s, n) in enumerate(procedure_vals)\n",
+ " procedure_dict[\"x_$(s)\"] = n\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{AbstractString,AbstractString} with 3 entries:\n",
+ " \"x_1\" => \"Appendectomy\"\n",
+ " \"x_2\" => \"Colectomy\"\n",
+ " \"x_3\" => \"Cholecystectomy\""
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "procedure_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "x_1 is Appendectomy\n",
+ "x_2 is Colectomy\n",
+ "x_3 is Cholecystectomy\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Iterating through a dictionary by key and value\n",
+ "for (k, v) in procedure_dict\n",
+ " println(k, \" is \",v)\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Lastly, we can sort using iteration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "a is 1\n",
+ "b is 2\n",
+ "c is 3\n",
+ "d is 4\n",
+ "e is 5\n",
+ "f is 6\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Sorting\n",
+ "dictionary6 = Dict(\"a\"=> 1,\"b\"=>2 ,\"c\"=>3 ,\"d\"=>4 ,\"e\"=>5 ,\"f\"=>6)\n",
+ "# Sorting using a for loop\n",
+ "for k in sort(collect(keys(dictionary6)))\n",
+ " println(\"$(k) is $(dictionary6[k])\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3-element Array{Int64,1}:\n",
+ " 1\n",
+ " 2\n",
+ " 3"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "a = [[1, 2, 3] [4, 5, 6] ]\n",
+ "b = [1,2,3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2-element Array{Int64,1}:\n",
+ " 14\n",
+ " 32"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "transpose(a) * b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2×3 LinearAlgebra.Transpose{Int64,Array{Int64,2}}:\n",
+ " 1 2 3\n",
+ " 4 5 6"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "transpose(a)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(6,)"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "size(repeat([1, 2], 3))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Tuple{String,Int64,String,Int64,String,Int64}"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "typeof((\"A\", 3, \"B\", 4, \"C\", 2))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.2.0",
+ "language": "julia",
+ "name": "julia-1.2"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.2.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Week4_Collections.jl b/Week4_Collections.jl
new file mode 100644
index 0000000..ece239b
--- /dev/null
+++ b/Week4_Collections.jl
@@ -0,0 +1,244 @@
+# COLLECTIONS
+# -----------
+
+# * Collections are groups of elements
+# * Elements are values of different Julia data types
+# * Storing elements in collections is one of the most useful
+# operations in computing
+
+# I ARRAYS
+
+# * Arrays are collections of values separated with commas and
+# them inside of square brackets
+# * They are represented in column or in row form
+
+# 1 Like a column vector (click on the downward arrow)
+array1 = [1, 2, 3]
+typeof(array1)
+
+# 2 Like row vector (click on the downward arrow)
+array2 = [1 2 3]
+typeof(array2)
+
+# 3 The transpose converts betwee the two
+transpose(array1)
+#The apostrophe is an alternative notation
+array1'
+
+# 4 Boolean logic (==)
+transpose(array1) == array1'
+
+# 5 Data type inheritance
+#With a mix of types, all the elements inherent the "highest" type
+array2 = [1, 2, 3.0]
+#Index for one of the original integers will be Float64
+array2[1]
+
+# 6 Column-wise entry of multidimensional array
+array3 = [[1, 2, 3] [4, 5, 6] [7, 8, 9]]
+
+# 7 Row-wise entry of multidimensional array
+array4 = [[1 2 3]; [4 5 6]; [7 8 9]]
+
+# 8 Length of array
+length(array3)
+length(array4)
+
+# 9 Index order of column-wise array
+for i in 1:length(array3)
+ println("Element $(i) is ", array3[i])
+end
+
+# 10 Index order of row-wise array
+for i in 1:length(array4)
+ println("Element $(i) is ", array4[i])
+end
+
+# 11 Using repeat() to repeat elements
+repeat([1, 2], 3)
+
+# 12 Using range(start, step, number of elements)
+range(1, step = 1, length = 10)
+typeof(range(1, step = 1, length = 10))
+
+# 13 Create collections usin gthe collect() function
+collect(range(1, step = 1, length = 10))
+#Short-hand
+collect(1:10)
+typeof(1:10)
+#Add step size
+collect(2:2:10)
+typeof(collect(2:2:10))
+
+# 14 Creating empty array with two rows and three columns
+array5 = Array{Union{Missing, Int}}(missing, 2, 3)
+
+# 15 Reshaping
+reshape(array5, 3, 2)
+
+# 16 Indexing (slicing)
+#Random uniform distribution of values in closed domain [10,20]
+#Shape 10 x 5
+array6 = rand(10:20, 10, 5)
+#All rows in first column
+array6[:, 1]
+#Rows two through five of second column
+array6[2:5, 2]
+#Values in rows 2, 4, 6, and in columns 1 and 5
+array6[[2, 4, 6], [1, 5]]
+#Values in row 1 from column 3 to the last column
+array6[1, 3:end]
+# Boolean logic (returning only true and false)
+array6[:, 1] .> 12
+
+# 17 Changing element values
+array7 = [1, 2, 3, 4, 5]
+#Permanantly append 10 to end of array
+push!(array7, 10)
+#Remove last element
+#Only the removed value will be displayed
+pop!(array7)
+array7
+#Change second element value to 1000
+array7[2] = 1000
+array7
+
+# 18 List comprehension
+array8 = [3 * i for i in 1:5]
+#Column-wise collection iterating through second element first
+array9 = [a * b for a in 1:3, b in 1:3]
+
+
+# 19 Arithmetic on arrays
+#Elementwise addition of a scalar using dot notation
+array8 .+ 1
+#Elementwise addition of similar sized arrays
+array7 + array8
+
+# 20 Missing
+# * Missing is a Julia data type
+# * Provides a placeholder for missing data in a statistical sense
+# * Propagates automatically
+# * Equality as a type can be tested
+# * Sorting is possible since missing is seen as greater than other values
+
+#Propagation
+missing + 1
+missing > 1
+[1, 2, 3, missing, 5] + [10, 20, 30, 40 ,50]
+#Cannot return true or false since value is not known
+missing == missing
+#Equality
+missing === missing
+isequal(missing, missing)
+#Sorting with isless()
+isless(1, missing)
+isless(Inf, missing)
+
+# 21 Array of integer zeros
+array11 = zeros(Int8, 3, 3)
+
+# 22 Array of floating point ones
+array12 = ones(Float16, 3, 3)
+
+# 23 Array of true (bit array) values
+array13 = trues(3, 3)
+
+# 24 Fill an array with n elements of value x
+array14 = fill(10, 3, 3)
+
+# 25 Convert elements to a different data type
+convert.(Float16, array14)
+
+# 26 Concatenation
+#Concatenate arrays along rows (makes row)
+array15 = [1, 2, 3]
+array16 = [10, 20, 30]
+cat(array15, array16, dims = 1)
+#Same as above
+vcat(array15, array16)
+#Concatenate arrays along columns (makes colums)
+cat(array15, array16, dims = 2)
+#Same as above
+hcat(array15, array16)
+
+# II TUPLES
+
+# * Tuples are immutable collections
+
+# 1 Tuples with mixed types
+tuple1 = (1, 2, 3, 4, "Julia")
+#For loop to look at value and type of each element
+for i in 1:length(tuple1)
+ println(" The value of the tuple at index number $(i) is $(tuple1[i]) and the type is $(typeof(tuple1[i])).")
+end
+
+# 2 Each element can be named
+a, b, c, seven = (1, 3, 5, 7)
+a
+seven
+
+# 2 Reverse order index (can be done with arrays too)
+tuple1[end:-1:1]
+
+# 3 Mixed length tuples
+tuple2 = ((1, 2, 3), 1, 2, (3, 100, 1))
+#Element 4
+tuple2[4]
+#Element 2 in element 4
+tuple2[4][2]
+
+# III DICTIONARIES
+
+# * Dictionaries are collection sof key-value pairs
+
+# 1 Example of a dictionary
+dictionary1 = Dict(1 => 77, 2 => 66, 3 => 1)
+#The => is shorthand for the Pair() function
+dictionary2 = Dict(Pair(1,100), Pair(2,200), Pair(3,300))
+
+# 2 Specifying types
+dictionary3 = Dict{Any, Any}(1 => 77, 2 => 66, 3 => "three")
+#We can get a bit crazy
+dictionary4 = Dict{Any, Any}("a" => 1, (2, 3) => "hello")
+
+# 3 Using symbols as keys
+dictionary5 = Dict(:A => 300, :B => 305, :C => 309)
+dictionary5[:A]
+
+# 4 Using in() to check on key-value pairs
+in((:A => 300), dictionary5)
+
+# 5 Changing an existing value
+dictionary5[:C] = 1000
+dictionary5
+
+# 6 Using the delete!() function
+delete!(dictionary5, :A)
+
+# 7 The keys of a dictionary
+keys(dictionary5)
+
+# 8 The values of a dictionary
+values(dictionary5)
+
+# 8 Creating a dictionary with automatic keys
+procedure_vals = ["Appendectomy", "Colectomy", "Cholecystectomy"]
+procedure_dict = Dict{AbstractString,AbstractString}()
+for (s, n) in enumerate(procedure_vals)
+ procedure_dict["x_$(s)"] = n
+end
+#Procedure_dict is now a dictionary
+procedure_dict
+
+# 9 Iterating through a dictionary by key and value
+for (k, v) in procedure_dict
+ println(k, " is ",v)
+end
+
+# 10 Sorting
+dictionary6 = Dict("a"=> 1,"b"=>2 ,"c"=>3 ,"d"=>4 ,"e"=>5 ,"f"=>6)
+# Sorting using a for loop
+for k in sort(collect(keys(dictionary6)))
+ println("$(k) is $(dictionary6[k])")
+end
diff --git a/Week4_Functions.ipynb b/Week4_Functions.ipynb
new file mode 100644
index 0000000..8e73847
--- /dev/null
+++ b/Week4_Functions.ipynb
@@ -0,0 +1,2252 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Functions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## In this lesson"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- [Introduction](#Introduction)\n",
+ "- [Creating a simple single expression function](#Creating-a-simple-single-expression-function)\n",
+ "- [Multiple expression functions](#Multiple-expression-functions)\n",
+ "- [Flow control in a function](#Flow-control-in-a-function)\n",
+ "- [Using optional arguments](#Using-optional-arguments)\n",
+ "- [Using keyword arguments to bypass the order problem](#Using-keyword-arguments-to-bypass-the-order-problem)\n",
+ "- [Functions with a variable number of arguments](#Functions-with-a-variable-number-of-arguments)\n",
+ "- [Passing arrays as function arguments](#Passing-arrays-as-function-arguments)\n",
+ "- [Type parameters](#Type-parameters)\n",
+ "- [Stabby functions and do blocks](#Stabby-functions-and-do-blocks)\n",
+ "- [Using functions as arguments](#Using-functions-as-arguments)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Introduction"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Julia is a functional language. Given specific information (called arguments), a function is a keyword that executes a task according to rules designed specifically for that function. Think of arithmetical addition as a task (a function) and the values to be added as the arguments.\n",
+ "The term _multiple dispatch_ refers to calling the right implementation of a function based on the arguments. Note that only the positional arguments are used to look up the correct method. When the function is used again, but with different argument types, a new method is selected. This is called _overloading_."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "While we would usually think of the task of addition as a single task, adding numbers, it can in fact be seen as more than one function. One with rules for adding integers, one for adding real numbers, one for adding complex numbers, and so on. So, when we call a function (typing the specific keyword and adding the arguments is referred to as _calling the function_), we actually call a whole buch of them. Julia decides which one it is going to use based on the argument types (there is a lookup table for every function, which is stored with the function). Julia generates low-level code based on your computer's instruction set. So, when you create a function () such as...\n",
+ "```\n",
+ "function cbd(a)\n",
+ " return a^3\n",
+ "end\n",
+ "```\n",
+ "... a whole bunch of methods are created (the different implementations of a function are called _methods_). When the function is called with an integer argument, Julia will generate code that uses the CPU's integer multiplication instruction set and when a floating point value is used, the floating point multiplication instruction set will be targeted."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's have a look at a quintessential Julia function. You might not recognize it at first, but typing `2 + 3` is actually converted to a keyword with arguments when executed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Adding 2 and 3\n",
+ "2 + 3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `+` symbol is actual a function name. The typical _architecture_ of a Julia function is then a keyword, with a set of arguments, seperated by commas, all inside of a set of parenthesis."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Addition as a function\n",
+ "+(2, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Creating a simple single expression function"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Functions in Julia can be created much like a mathematical function. Below we create a function called `f` that takes a single argument. We use the character `x` as placeholder argument. The right-hand side of the equation stipulates the task that we want the function to perform, given a value for the argument."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "f (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# A function to square the argument value\n",
+ "f(x) = x^2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "1 method for generic function f:
"
+ ],
+ "text/plain": [
+ "# 1 method for generic function \"f\":\n",
+ "[1] f(x) in Main at In[3]:2"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "methods(f)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can no call the function and provide an argument."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "100"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Squaring 10\n",
+ "f(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The answer is $100$ as expected."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Whilest our function seems algebraic in nature, we can create a similar function that will act on a string."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "p (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating a function to print input to the screen\n",
+ "p(x) = println(x, \" was entered!\") # The comma concatenates the two strings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If we now pass a string as argument and see the result."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Julia was entered!\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Passing the string \"Julia\"\n",
+ "p(\"Julia\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can use more than one argument too."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "g (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Simple replication of the + function for two arguments\n",
+ "g(x, y) = x + y"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Passing two numbers as arguments now adds the two values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "7"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "g(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Multiple expression functions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "With single expression functions, it was convenient to use the shortcut (almost mathematical) syntax we used above. If we want a function to do a few more things, even have flow control, we have to use function syntax. In the first example below we will have a function that takes two arguments and performs two tasks (has two expressions)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The creation of such a proper Julia function is achieved using the `function` keyword. this is followed by the name given to our new function. It is important to stick to conventions and not use illegal words and characters. The former included reserved keywords that are already Julia functions and the latter includes leading numbers."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A list of placeholder symbols for our arguments follow. In the function below, we use two arguments. The first task we would like the function to perform is to print the two values that are entered as arguments. The second multiplies the values. All function are completed with the `end` keyword."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mltpl (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Declaring the block of code as a function using the function keyword, giving it a name,\n",
+ "# and listing the arguments\n",
+ "function mltpl(x, y)\n",
+ " print(\"The first value is $x and the second value is $y.\\n$x x $y is:\")\n",
+ " # The dollar signs are placeholders for the argument values\n",
+ " # The \\n combination indicates a new-line\n",
+ " x * y\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first value is 3 and the second value is 4.\n",
+ "3 x 4 is:"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mltpl(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `return` keyword can be used to force a halt to the taks being performed. It is not immediately obvious how this can be helpful. Below is a demonstartion. (An example that shows the usefulness of the `return` keyword is shown in [Flow control in a function](#Flow-control-in-a-function) below.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mltpl_return (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The expression (task) after the return keyword will be ignored\n",
+ "function mltpl_return(x, y)\n",
+ " print(\"The first value is $x and the second value is $y.\\n$x x $y is:\")\n",
+ " # The dollar signs are placeholders for the argument values\n",
+ " # The \\n combination indicates a new-line\n",
+ " return x * y\n",
+ " x + y # Adding addition of the two argument values after the return keyword\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first value is 3 and the second value is 4.\n",
+ "3 x 4 is:"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mltpl_return(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The omission of the `return` keyword can lead to some unexpected behaviour. Below, we print a line in the first expression, than successively add, subtract, and multiply the two argument values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "omit_return (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function omit_return(x, y)\n",
+ " println(\"The argument values that were passed are $x and $y\")\n",
+ " x + y\n",
+ " x - y\n",
+ " x * y\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The argument values that were passed are 3 and 4\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "omit_return(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Only the `println()` expression and the last expression were executed. We can correct this as shown below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "multiple_return (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function multiple_return(x , y)\n",
+ " println(\"The argument values that were passed are $x and $y\")\n",
+ " x + y, x - y, x * y\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The argument values that were passed are 3 and 4\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(7, -1, 12)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "multiple_return(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We see the result of the arithmetical operations are returned as a tuple. This can be useful as we can assign a computer variable name to each of the elements in the tuple."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The argument values that were passed are 3 and 4\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(7, -1, 12)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ans1, ans2, ans3 = multiple_return(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "7"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calling the value in ans1\n",
+ "ans1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "-1"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ans2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ans3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Flow control in a function"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A function can have flow control, i.e. `if-else` statements as tasks. Below is an example that also makes the benefits of the use of the `return` keyword more obvious."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The aim of the function is to return the absolute value of the difference between two numbers, without the use of the `abs()` function. The latter returns the absolute value of a value."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "abs_diff (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function abs_diff(x, y)\n",
+ " if x >= y\n",
+ " return x - y\n",
+ " end\n",
+ " return y - x\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The absolute value of 4 - 3\n",
+ "abs_diff(4, 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The absolute value of 10 - 12\n",
+ "abs_diff(10, 12)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Using optional arguments"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Optional arguments can be passed as arguments when a function is being created. These are provided with default values. When they are not used when calling the function, these default values are used. They can be overwritten when the argument is called, though."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "func (generic function with 2 methods)"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function func(a, b, c = 100)\n",
+ " print(\" We have the values $a, $b, and $c.\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "When omitting to provide the third argument, the default of $100$ is used."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " We have the values 10, 20, and 100."
+ ]
+ }
+ ],
+ "source": [
+ "# Omitting the third argument\n",
+ "func(10, 20)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below, we provide a different value to the third argument."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " We have the values 10, 20, and 1000."
+ ]
+ }
+ ],
+ "source": [
+ "func(10, 20, 1000)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Using keyword arguments to bypass the order problem"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can create function with many, many argument. Problem is, we might forget the argument order when calling the function and passing values to it. To solve this problem the semi-colon (;) can be used (usually after the ordered arguments). Let's take a look."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "func2 (generic function with 2 methods)"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# A most ridiculously long print statement (apologies)\n",
+ "function func2(a, b, c = 100 ; p = 100, q = \"red\")\n",
+ " println(\"The first ordered argument value is $(a).\")\n",
+ " println(\"The second ordered argumnent is $(b).\")\n",
+ " println(\"The third ordered argument was optional.\")\n",
+ " println(\"If you see a value of 100 here, you either passed a value of 100 or omitted it: $(c).\")\n",
+ " println(\"Let's see what happend to the keyword p: $(p).\")\n",
+ " println(\"Let's see what happens to the keyword q: $(q).\")\n",
+ " println(\"Oh yes, let's also return something useful, like multiplying $(a) and $(b), yielding:\")\n",
+ " return a * b\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can now call the function with just the first two arguments."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first ordered argument value is 3.\n",
+ "The second ordered argumnent is 4.\n",
+ "The third ordered argument was optional.\n",
+ "If you see a value of 100 here, you either passed a value of 100 or omitted it: 100.\n",
+ "Let's see what happend to the keyword p: 100.\n",
+ "Let's see what happens to the keyword q: red.\n",
+ "Oh yes, let's also return something useful, like multiplying 3 and 4, yielding:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calling just the first two ordered arguments\n",
+ "func2(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, let's change the default value for the third arguments and then also some of the keyword arguments."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first ordered argument value is 3.\n",
+ "The second ordered argumnent is 4.\n",
+ "The third ordered argument was optional.\n",
+ "If you see a value of 100 here, you either passed a value of 100 or omitted it: 5.\n",
+ "Let's see what happend to the keyword p: 100.\n",
+ "Let's see what happens to the keyword q: red.\n",
+ "Oh yes, let's also return something useful, like multiplying 3 and 4, yielding:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calling something else for c\n",
+ "func2(3, 4, 5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first ordered argument value is 3.\n",
+ "The second ordered argumnent is 4.\n",
+ "The third ordered argument was optional.\n",
+ "If you see a value of 100 here, you either passed a value of 100 or omitted it: 100.\n",
+ "Let's see what happend to the keyword p: π.\n",
+ "Let's see what happens to the keyword q: red.\n",
+ "Oh yes, let's also return something useful, like multiplying 3 and 4, yielding:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Now let's have some fun with the keyword arguments\n",
+ "func2(3, 4, p = pi) # Using the pi Julia keyword"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first ordered argument value is 3.\n",
+ "The second ordered argumnent is 4.\n",
+ "The third ordered argument was optional.\n",
+ "If you see a value of 100 here, you either passed a value of 100 or omitted it: 2.\n",
+ "Let's see what happend to the keyword p: 100.\n",
+ "Let's see what happens to the keyword q: Hello!.\n",
+ "Oh yes, let's also return something useful, like multiplying 3 and 4, yielding:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Now for q\n",
+ "func2(3, 4, 2, q = \"Hello!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The order of the keyword arguments can now be changed when calling the function. As long as we remember to use their names."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first ordered argument value is 3.\n",
+ "The second ordered argumnent is 4.\n",
+ "The third ordered argument was optional.\n",
+ "If you see a value of 100 here, you either passed a value of 100 or omitted it: 2.\n",
+ "Let's see what happend to the keyword p: 2.718281828459045.\n",
+ "Let's see what happens to the keyword q: It works!.\n",
+ "Oh yes, let's also return something useful, like multiplying 3 and 4, yielding:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Mixing the keyword arguments around\n",
+ "func2(3, 4, 2, q = \"It works!\", p = exp(1))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The keyword arguments can indeed be placed anywhere, simply use their names. The values before the semicolon, though has to be used, or at least interspersed in the correct order."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The first ordered argument value is 3.\n",
+ "The second ordered argumnent is 4.\n",
+ "The third ordered argument was optional.\n",
+ "If you see a value of 100 here, you either passed a value of 100 or omitted it: 2.\n",
+ "Let's see what happend to the keyword p: 1.7320508075688772.\n",
+ "Let's see what happens to the keyword q: Bananas!.\n",
+ "Oh yes, let's also return something useful, like multiplying 3 and 4, yielding:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# And finally, we go bananas!\n",
+ "func2(q = \"Bananas!\", 3, 4, p = sqrt(3), 2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Functions with a variable number of arguments"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can use three dots, as in ..., (called a splat or ellipsis) to indicate none, one, or many arguments. Let's take a look."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "func3 (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function func3(args...)\n",
+ " print(\"I can tell you how many arguments you passed: $(length(args)).\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The function simply counts the number of arguments passed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I can tell you how many arguments you passed: 0."
+ ]
+ }
+ ],
+ "source": [
+ "# Calling nothing, nothing, nothing. Hello! Is anyone home?\n",
+ "func3()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below, we take a look at what happens when we pass a variety of arguments."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I can tell you how many arguments you passed: 1."
+ ]
+ }
+ ],
+ "source": [
+ "# Now someone's home!\n",
+ "func3(1000000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I can tell you how many arguments you passed: 1."
+ ]
+ }
+ ],
+ "source": [
+ "# It's Julia!\n",
+ "func3(\"Julia\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I can tell you how many arguments you passed: 2."
+ ]
+ }
+ ],
+ "source": [
+ "# Passing two arguments\n",
+ "func3(\"Hello\", \"Julia\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I can tell you how many arguments you passed: 7."
+ ]
+ }
+ ],
+ "source": [
+ "# Passing multiple arguments of different types\n",
+ "func3(\"Julia\", \"is\", 1, \"in\", \"a\", 1000000, \"!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The splat or ellipsis as indicator of allowing the use of multiple (infinite) arguments, can solve some problems. In the example below we will pass a list of strings as arguments and see what happens."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "surgery (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# A functions that joins strings\n",
+ "function surgery(string_array)\n",
+ " string_items = join(string_array, \", \", \" and \") # Creating a computer variable to hold\n",
+ " # the arguments and concatenate a comma and the word and\n",
+ " print(\"Today I performed the following operations: $string_items\", \"!\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Today I performed the following operations: colonic resection and appendectomy!"
+ ]
+ }
+ ],
+ "source": [
+ "# Passing two arguments\n",
+ "surgery([\"colonic resection\", \"appendectomy\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Today I performed the following operations: a, p, p, e, n, d, e, c, t, o, m and y!"
+ ]
+ }
+ ],
+ "source": [
+ "# What if I forget the square brackets []\n",
+ "# The join() function will act on the characters in the string\n",
+ "surgery(\"appendectomy\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "splat_surgery (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Now we don't restrict the number of arguments\n",
+ "function splat_surgery(stringsss...)\n",
+ " string_items = join(stringsss, \", \", \" and \")\n",
+ " print(\"Today I performed the following operations: $string_items\", \"!\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Today I performed the following operations: appendectomy!"
+ ]
+ }
+ ],
+ "source": [
+ "splat_surgery(\"appendectomy\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For the sake of clarity, look at the following example to see what Julia does to the args... arguments. You will note that it is actually managed as a tuple."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "argues (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function argues(a, b, s...)\n",
+ " print(\"The argument values are: $a, $b, and $s\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The argument values are: 3, 4, and (5, 6, 7, 8, \"Julia\")"
+ ]
+ }
+ ],
+ "source": [
+ "# The first two values, 3 and 4, have proper assignment, but the rest will be in a tuple\n",
+ "argues(3, 4, 5, 6, 7, 8, \"Julia\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The argument values are: 3, 4, and ()"
+ ]
+ }
+ ],
+ "source": [
+ "# Now for an empty tuple\n",
+ "argues(3, 4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now for some real fun. We can combine keywords and splats. Have a look at this."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "fun_func (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating a function that only contains keywords, but they are\n",
+ "# splats\n",
+ "function fun_func(; a...)\n",
+ " a\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pairs(::NamedTuple) with 3 entries:\n",
+ " :var1 => \"Julia\"\n",
+ " :var2 => \"Language\"\n",
+ " :val1 => 3"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calling the fun_func() function, remembering to give the keywords names\n",
+ "fun_func(var1 = \"Julia\", var2 = \"Language\", val1 = 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We now have a collection of (key, value) tuples, with the key coming from the name we gave the keyword argument. Moreover, it is actually a symbol which you will note by the colon (:) preceding it."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Passing arrays as function arguments"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once a function is defined, an array of values can be passed to it using the `map()` function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Creating an array\n",
+ "xvals = [-3, -2.5, -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3];"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "sqr (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating the function\n",
+ "function sqr(a)\n",
+ " return a^2\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `map()` function will now map the function to each value in the array."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13-element Array{Float64,1}:\n",
+ " 9.0 \n",
+ " 6.25\n",
+ " 4.0 \n",
+ " 2.25\n",
+ " 1.0 \n",
+ " 0.25\n",
+ " 0.0 \n",
+ " 0.25\n",
+ " 1.0 \n",
+ " 2.25\n",
+ " 4.0 \n",
+ " 6.25\n",
+ " 9.0 "
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Mapping the array to the function\n",
+ "map(sqr, xvals)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The dot notation after a function achieves the same results."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13-element Array{Float64,1}:\n",
+ " 9.0 \n",
+ " 6.25\n",
+ " 4.0 \n",
+ " 2.25\n",
+ " 1.0 \n",
+ " 0.25\n",
+ " 0.0 \n",
+ " 0.25\n",
+ " 1.0 \n",
+ " 2.25\n",
+ " 4.0 \n",
+ " 6.25\n",
+ " 9.0 "
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sqr.(xvals)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Type parameters"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It is possible to limit a function to accepting only cenrtain argument types."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "m (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function m(x::Int)\n",
+ " return 3 * x\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using the `methods()` function, we can now see that only integers argument values are allowed."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "1 method for generic function m:- m(x::Int64) in Main at In[55]:2
"
+ ],
+ "text/plain": [
+ "# 1 method for generic function \"m\":\n",
+ "[1] m(x::Int64) in Main at In[55]:2"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "methods(m)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calling the function with an integer\n",
+ "m(3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A flotaing point value such as `m(3.)` will result in an error.\n",
+ "```\n",
+ "MethodError: no method matching m(::Float64)\n",
+ "Closest candidates are:\n",
+ " m(!Matched::Int64) at In[58]:2\n",
+ "\n",
+ "Stacktrace:\n",
+ " [1] top-level scope at In[62]:1\n",
+ " ```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Stabby functions and do blocks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Stabby lambda functions as they are called, are quick-and-dirty functions. They are examples of anonymous functions, the latter referring to the fact that they don't have a name. The do block is also a form of anonymous function. Let's look at some examples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "#5 (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The Julia syntax uses the -> character combinations, hence stabby!\n",
+ "x -> 2x^2 + 3x - 2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can now us the `map()` function to apply the values in an array to this stabby function. Note that the stabby function cannot be called."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 3\n",
+ " 12\n",
+ " 25\n",
+ " 42\n",
+ " 63"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "map(x -> 2x^2 + 3x - 2, [1, 2, 3, 4, 5])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There is another way of achieving this using `do` blocks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 3\n",
+ " 12\n",
+ " 25\n",
+ " 42\n",
+ " 63"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's do something\n",
+ "map([1, 2, 3, 4, 5]) do x\n",
+ " 2x^2 + 3x - 2\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `do` block can do some more!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 300\n",
+ " 600\n",
+ " 900\n",
+ " 2000\n",
+ " 3300"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "map([3, 6, 9, 10, 11]) do x\n",
+ " if mod(x, 3) == 0 # If the value is divisible by 3\n",
+ " 100x\n",
+ " elseif mod(x, 3) == 1 # If the remainder after dividing by 3 is 1\n",
+ " 200x\n",
+ " else\n",
+ " mod(x, 3) == 2 # If the remainder is 2\n",
+ " 300x\n",
+ " end\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Using functions as arguments"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As the title of this section implies, we can pass a function as an argument. That functional argument will actually call the function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "luv (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# First function\n",
+ "function string_func(s)\n",
+ " str = s()\n",
+ " print(\"I love $str\", \"!\")\n",
+ "end\n",
+ "\n",
+ "# Second function\n",
+ "function luv()\n",
+ " return(\"Julia\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I love Julia!"
+ ]
+ }
+ ],
+ "source": [
+ "string_func(luv)\n",
+ "# Calling the function string_func\n",
+ "# Passing a function as an argument, which then calls that function\n",
+ "# The called luv function returns the string Julia, which is now the argument of the originally called function"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lesson)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "simple_addition (generic function with 2 methods)"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function simple_addition(a, b = 1; c = 3)\n",
+ " return a + b + c\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "simple_addition(6)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "8"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "simple_addition(3,2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "simple_addition (generic function with 3 methods)"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function simple_addition(x::Int64, y::Int64)\n",
+ " return x + y\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "MethodError",
+ "evalue": "MethodError: no method matching simple_addition(; x=5, y=5)\nClosest candidates are:\n simple_addition(!Matched::Int64, !Matched::Int64) at In[61]:2 got unsupported keyword arguments \"x\", \"y\"\n simple_addition(!Matched::Any) at In[58]:2 got unsupported keyword arguments \"x\", \"y\"\n simple_addition(!Matched::Any, !Matched::Any; c) at In[58]:2 got unsupported keyword arguments \"x\", \"y\"",
+ "output_type": "error",
+ "traceback": [
+ "MethodError: no method matching simple_addition(; x=5, y=5)\nClosest candidates are:\n simple_addition(!Matched::Int64, !Matched::Int64) at In[61]:2 got unsupported keyword arguments \"x\", \"y\"\n simple_addition(!Matched::Any) at In[58]:2 got unsupported keyword arguments \"x\", \"y\"\n simple_addition(!Matched::Any, !Matched::Any; c) at In[58]:2 got unsupported keyword arguments \"x\", \"y\"",
+ "",
+ "Stacktrace:",
+ " [1] top-level scope at In[62]:1"
+ ]
+ }
+ ],
+ "source": [
+ "simple_addition(x=5,y=5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "simple_addition(5,5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13.0"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "simple_addition(5.,5.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element Array{Int64,1}:\n",
+ " 3\n",
+ " 25\n",
+ " 63\n",
+ " 117\n",
+ " 187"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "map(x -> 2x^2 + 3x - 2, 1:2:9)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "my_function (generic function with 2 methods)"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "function my_function(a::Float64, b::Float64, c::Float64 = 2 + 8im)\n",
+ " return a - (b * c)\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "MethodError",
+ "evalue": "MethodError: no method matching my_function(::Float64, ::Float64, ::Int64)\nClosest candidates are:\n my_function(::Float64, ::Float64, !Matched::Float64) at In[67]:2\n my_function(::Float64, ::Float64) at In[67]:2",
+ "output_type": "error",
+ "traceback": [
+ "MethodError: no method matching my_function(::Float64, ::Float64, ::Int64)\nClosest candidates are:\n my_function(::Float64, ::Float64, !Matched::Float64) at In[67]:2\n my_function(::Float64, ::Float64) at In[67]:2",
+ "",
+ "Stacktrace:",
+ " [1] top-level scope at In[68]:1"
+ ]
+ }
+ ],
+ "source": [
+ "my_function(4.,3.5,8)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.2.0",
+ "language": "julia",
+ "name": "julia-1.2"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.2.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Week4_Functions.jl b/Week4_Functions.jl
new file mode 100644
index 0000000..e97b31f
--- /dev/null
+++ b/Week4_Functions.jl
@@ -0,0 +1,104 @@
++(2, 2)
+versioninfo()
+# FUNCTIONS IN JULIA 1.0 (0.7+)
+# ----------------------
+
+# * A function maps a tuple of arguments to a return value
+
+# I Creating basic Functions
+
+# 1 using the function keyword
+#Create a function named my_addition
+#Takes two arguments
+#Return the addition of the two values
+function my_addition(x, y)
+ return x + y
+end
+
+# 2 Calling a function
+#Call the function with two argument values
+my_addition(3, 4)
+
+# 3 Built-in functions
+#The plus, +, symbol (as other arithmetical symbols) are built-in functions
++(3, 4)
+
+# 4 Using a Unicode symbol as a function name
+function Σ(x, y)
+ return x + y
+end
+
+Σ(3, 4)
+
+# II Anonymous functions
+
+# * Functions can be assigned to variables
+# * Functions can be used as arguments
+# * Functions can be returned as values
+
+# 1 An anonymous function
+x -> x^2 + 3
+
+# 2 A function as an argument
+#Passing the round() function as argument to the map() function
+map(round, [2.1, 3.4, 7.9])
+#Passing an anonymous function as an arguemt to the map() function
+map(x -> x^2, [2, 3, 4])
+
+# III Tuples and functions
+
+# * Tuples are immutable collections
+
+# 1 Examples of tuples
+my_tuple = (1, "Julia", 7)
+typeof(my_tuple)
+
+# * Single value tuple must have a comma
+my_second_tuple = (4,)
+typeof(my_second_tuple)
+
+# 2 Indexing a tuple
+length(my_tuple)
+my_tuple[2]
+
+# 3 Tuple indexing
+# * Named tuple creates a name for each element
+my_other_tuple = (a = 4, b = "Julia", c = 3)
+#Indexing tuple by name
+my_other_tuple.b
+
+# 4 Function returns
+# * Multiple return values of a function are tuples
+function my_function(a, b)
+ return a + b, a - b
+end
+#Calling the function
+my_function(10, 5)
+#Looking up the type of the function return
+typeof(my_function(10, 5))
+#This allows for each element to be given a variable
+r, s = my_function(10, 5)
+r
+s
+
+# IV Functions with keyword arguments
+
+# * Keyword arguments are added after semi-colon
+# * Their order is not explicit
+# * Default values are addded
+
+# 1 Creating a function with a keyword argument
+function my_keyword_function(x, y; z = 3)
+ return x + y + z
+end
+
+# * Omission of keyword argument uses default
+my_keyword_function(1, 2)
+# * Keyword argument names must be used
+my_keyword_function(1, 2, z = 10)
+
+# 4 Use of dot notation for functions
+
+# * Passes a collection elementwise to a function
+# * Use instead of map()
+sin.([0., π/2., π, 3/2. * π, π])
diff --git a/Week4_PR_Template.ipynb b/Week4_PR_Template.ipynb
new file mode 100644
index 0000000..d2ea5f5
--- /dev/null
+++ b/Week4_PR_Template.ipynb
@@ -0,0 +1,569 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Week 4 Peer Review"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "2. In a code cell below, import the required packages: Distributions, DataFrames, and Random (install these packages via the REPL if required)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import the required packages\n",
+ "using Distributions, DataFrames, Random"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Seed the random number generator\n",
+ "Random.seed!(1234);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "3. In a code cell below, create a dataframe named df1, with 30 rows and 4 columns (variables). Call the first column ID. It should hold the values 1 through 30 (to make up 30 rows). Use three rand() function calls to generate three more columns named var1, var2, and var3. The second column (var1) should consist of 30 values from a standard normal distribution (mean of 0 and standard deviation of 1). The third column (var2) should consist of 30 random value from a normal distribution with a mean of 10 and a standard deviation of 2. The last column (var3) should contain 30 random values chosen from a range of integers between (and including) 5 and 15."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | ID | var1 | var2 | var3 |
---|
| Int64 | Float64 | Float64 | Int64 |
---|
30 rows × 4 columns
1 | 1 | 0.867347 | 7.44066 | 14 |
---|
2 | 2 | -0.901744 | 11.9946 | 13 |
---|
3 | 3 | -0.494479 | 10.6048 | 12 |
---|
4 | 4 | -0.902914 | 9.92711 | 9 |
---|
5 | 5 | 0.864401 | 10.2839 | 15 |
---|
6 | 6 | 2.21188 | 11.0425 | 14 |
---|
7 | 7 | 0.532813 | 11.7935 | 15 |
---|
8 | 8 | -0.271735 | 8.97294 | 9 |
---|
9 | 9 | 0.502334 | 8.4704 | 9 |
---|
10 | 10 | -0.516984 | 6.91715 | 8 |
---|
11 | 11 | -0.560501 | 9.83968 | 15 |
---|
12 | 12 | -0.0192918 | 7.81756 | 14 |
---|
13 | 13 | 0.128064 | 8.83897 | 11 |
---|
14 | 14 | 1.85278 | 9.36913 | 10 |
---|
15 | 15 | -0.827763 | 7.2771 | 15 |
---|
16 | 16 | 0.110096 | 9.77109 | 15 |
---|
17 | 17 | -0.251176 | 10.3317 | 6 |
---|
18 | 18 | 0.369714 | 9.18312 | 5 |
---|
19 | 19 | 0.0721164 | 7.98043 | 12 |
---|
20 | 20 | -1.50343 | 8.91239 | 13 |
---|
21 | 21 | 1.56417 | 7.54655 | 14 |
---|
22 | 22 | -1.39674 | 8.91657 | 5 |
---|
23 | 23 | 1.1055 | 8.62701 | 8 |
---|
24 | 24 | -1.10673 | 8.57414 | 9 |
---|
25 | 25 | -3.21136 | 9.34588 | 5 |
---|
26 | 26 | -0.0740145 | 11.0297 | 9 |
---|
27 | 27 | 0.150976 | 14.8349 | 10 |
---|
28 | 28 | 0.769278 | 9.38405 | 14 |
---|
29 | 29 | -0.310153 | 12.4906 | 15 |
---|
30 | 30 | -0.602707 | 9.9001 | 7 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccc}\n",
+ "\t& ID & var1 & var2 & var3\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Float64 & Float64 & Int64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 0.867347 & 7.44066 & 14 \\\\\n",
+ "\t2 & 2 & -0.901744 & 11.9946 & 13 \\\\\n",
+ "\t3 & 3 & -0.494479 & 10.6048 & 12 \\\\\n",
+ "\t4 & 4 & -0.902914 & 9.92711 & 9 \\\\\n",
+ "\t5 & 5 & 0.864401 & 10.2839 & 15 \\\\\n",
+ "\t6 & 6 & 2.21188 & 11.0425 & 14 \\\\\n",
+ "\t7 & 7 & 0.532813 & 11.7935 & 15 \\\\\n",
+ "\t8 & 8 & -0.271735 & 8.97294 & 9 \\\\\n",
+ "\t9 & 9 & 0.502334 & 8.4704 & 9 \\\\\n",
+ "\t10 & 10 & -0.516984 & 6.91715 & 8 \\\\\n",
+ "\t11 & 11 & -0.560501 & 9.83968 & 15 \\\\\n",
+ "\t12 & 12 & -0.0192918 & 7.81756 & 14 \\\\\n",
+ "\t13 & 13 & 0.128064 & 8.83897 & 11 \\\\\n",
+ "\t14 & 14 & 1.85278 & 9.36913 & 10 \\\\\n",
+ "\t15 & 15 & -0.827763 & 7.2771 & 15 \\\\\n",
+ "\t16 & 16 & 0.110096 & 9.77109 & 15 \\\\\n",
+ "\t17 & 17 & -0.251176 & 10.3317 & 6 \\\\\n",
+ "\t18 & 18 & 0.369714 & 9.18312 & 5 \\\\\n",
+ "\t19 & 19 & 0.0721164 & 7.98043 & 12 \\\\\n",
+ "\t20 & 20 & -1.50343 & 8.91239 & 13 \\\\\n",
+ "\t21 & 21 & 1.56417 & 7.54655 & 14 \\\\\n",
+ "\t22 & 22 & -1.39674 & 8.91657 & 5 \\\\\n",
+ "\t23 & 23 & 1.1055 & 8.62701 & 8 \\\\\n",
+ "\t24 & 24 & -1.10673 & 8.57414 & 9 \\\\\n",
+ "\t25 & 25 & -3.21136 & 9.34588 & 5 \\\\\n",
+ "\t26 & 26 & -0.0740145 & 11.0297 & 9 \\\\\n",
+ "\t27 & 27 & 0.150976 & 14.8349 & 10 \\\\\n",
+ "\t28 & 28 & 0.769278 & 9.38405 & 14 \\\\\n",
+ "\t29 & 29 & -0.310153 & 12.4906 & 15 \\\\\n",
+ "\t30 & 30 & -0.602707 & 9.9001 & 7 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "30×4 DataFrame\n",
+ "│ Row │ ID │ var1 │ var2 │ var3 │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
+ "├─────┼───────┼────────────┼─────────┼───────┤\n",
+ "│ 1 │ 1 │ 0.867347 │ 7.44066 │ 14 │\n",
+ "│ 2 │ 2 │ -0.901744 │ 11.9946 │ 13 │\n",
+ "│ 3 │ 3 │ -0.494479 │ 10.6048 │ 12 │\n",
+ "│ 4 │ 4 │ -0.902914 │ 9.92711 │ 9 │\n",
+ "│ 5 │ 5 │ 0.864401 │ 10.2839 │ 15 │\n",
+ "│ 6 │ 6 │ 2.21188 │ 11.0425 │ 14 │\n",
+ "│ 7 │ 7 │ 0.532813 │ 11.7935 │ 15 │\n",
+ "│ 8 │ 8 │ -0.271735 │ 8.97294 │ 9 │\n",
+ "│ 9 │ 9 │ 0.502334 │ 8.4704 │ 9 │\n",
+ "│ 10 │ 10 │ -0.516984 │ 6.91715 │ 8 │\n",
+ "⋮\n",
+ "│ 20 │ 20 │ -1.50343 │ 8.91239 │ 13 │\n",
+ "│ 21 │ 21 │ 1.56417 │ 7.54655 │ 14 │\n",
+ "│ 22 │ 22 │ -1.39674 │ 8.91657 │ 5 │\n",
+ "│ 23 │ 23 │ 1.1055 │ 8.62701 │ 8 │\n",
+ "│ 24 │ 24 │ -1.10673 │ 8.57414 │ 9 │\n",
+ "│ 25 │ 25 │ -3.21136 │ 9.34588 │ 5 │\n",
+ "│ 26 │ 26 │ -0.0740145 │ 11.0297 │ 9 │\n",
+ "│ 27 │ 27 │ 0.150976 │ 14.8349 │ 10 │\n",
+ "│ 28 │ 28 │ 0.769278 │ 9.38405 │ 14 │\n",
+ "│ 29 │ 29 │ -0.310153 │ 12.4906 │ 15 │\n",
+ "│ 30 │ 30 │ -0.602707 │ 9.9001 │ 7 │"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = DataFrame(ID = 1:30, var1 = rand(Normal(0,1),30), var2 = rand(Normal(10,2),30), var3 = rand(5:15,30))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "4.In code cells below, write the code to calculate the mean and variance of each column in the dataframe. For example for the first variable this could be done using the println function and referring to each column (variable) by its symbol notation. Try to shorten the code with a for-loop, iterating over the variables names (in symbol format)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Warning: `getindex(df::DataFrame, col_ind::ColumnIndex)` is deprecated, use `df[!, col_ind]` instead.\n",
+ "│ caller = top-level scope at In[4]:3\n",
+ "└ @ Core ./In[4]:3\n",
+ "┌ Warning: `getindex(df::DataFrame, col_ind::ColumnIndex)` is deprecated, use `df[!, col_ind]` instead.\n",
+ "│ caller = top-level scope at In[4]:4\n",
+ "└ @ Core ./In[4]:4\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The mean of var1 is: -0.061674963752526096, the variance is: 1.1790054448274625\n",
+ "The mean of var2 is: 9.580613055613338, the variance is: 2.948790077536739\n",
+ "The mean of var3 is: 11.0, the variance is: 11.724137931034482\n"
+ ]
+ }
+ ],
+ "source": [
+ "for s in [:var1,:var2,:var3] #names(df)\n",
+ " colname = String(s)\n",
+ " meancol = mean(df[s])\n",
+ " variancecol = var(df[s])\n",
+ " println(\"The mean of $colname is: $meancol, the variance is: $variancecol\")\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "5. In a code cells below, create a new DataFrame named df2 from the last 20 rows of the original DataFrame, df1."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | ID | var1 | var2 | var3 |
---|
| Int64 | Float64 | Float64 | Int64 |
---|
20 rows × 4 columns
1 | 11 | -0.560501 | 9.83968 | 15 |
---|
2 | 12 | -0.0192918 | 7.81756 | 14 |
---|
3 | 13 | 0.128064 | 8.83897 | 11 |
---|
4 | 14 | 1.85278 | 9.36913 | 10 |
---|
5 | 15 | -0.827763 | 7.2771 | 15 |
---|
6 | 16 | 0.110096 | 9.77109 | 15 |
---|
7 | 17 | -0.251176 | 10.3317 | 6 |
---|
8 | 18 | 0.369714 | 9.18312 | 5 |
---|
9 | 19 | 0.0721164 | 7.98043 | 12 |
---|
10 | 20 | -1.50343 | 8.91239 | 13 |
---|
11 | 21 | 1.56417 | 7.54655 | 14 |
---|
12 | 22 | -1.39674 | 8.91657 | 5 |
---|
13 | 23 | 1.1055 | 8.62701 | 8 |
---|
14 | 24 | -1.10673 | 8.57414 | 9 |
---|
15 | 25 | -3.21136 | 9.34588 | 5 |
---|
16 | 26 | -0.0740145 | 11.0297 | 9 |
---|
17 | 27 | 0.150976 | 14.8349 | 10 |
---|
18 | 28 | 0.769278 | 9.38405 | 14 |
---|
19 | 29 | -0.310153 | 12.4906 | 15 |
---|
20 | 30 | -0.602707 | 9.9001 | 7 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccc}\n",
+ "\t& ID & var1 & var2 & var3\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Float64 & Float64 & Int64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 11 & -0.560501 & 9.83968 & 15 \\\\\n",
+ "\t2 & 12 & -0.0192918 & 7.81756 & 14 \\\\\n",
+ "\t3 & 13 & 0.128064 & 8.83897 & 11 \\\\\n",
+ "\t4 & 14 & 1.85278 & 9.36913 & 10 \\\\\n",
+ "\t5 & 15 & -0.827763 & 7.2771 & 15 \\\\\n",
+ "\t6 & 16 & 0.110096 & 9.77109 & 15 \\\\\n",
+ "\t7 & 17 & -0.251176 & 10.3317 & 6 \\\\\n",
+ "\t8 & 18 & 0.369714 & 9.18312 & 5 \\\\\n",
+ "\t9 & 19 & 0.0721164 & 7.98043 & 12 \\\\\n",
+ "\t10 & 20 & -1.50343 & 8.91239 & 13 \\\\\n",
+ "\t11 & 21 & 1.56417 & 7.54655 & 14 \\\\\n",
+ "\t12 & 22 & -1.39674 & 8.91657 & 5 \\\\\n",
+ "\t13 & 23 & 1.1055 & 8.62701 & 8 \\\\\n",
+ "\t14 & 24 & -1.10673 & 8.57414 & 9 \\\\\n",
+ "\t15 & 25 & -3.21136 & 9.34588 & 5 \\\\\n",
+ "\t16 & 26 & -0.0740145 & 11.0297 & 9 \\\\\n",
+ "\t17 & 27 & 0.150976 & 14.8349 & 10 \\\\\n",
+ "\t18 & 28 & 0.769278 & 9.38405 & 14 \\\\\n",
+ "\t19 & 29 & -0.310153 & 12.4906 & 15 \\\\\n",
+ "\t20 & 30 & -0.602707 & 9.9001 & 7 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "20×4 DataFrame\n",
+ "│ Row │ ID │ var1 │ var2 │ var3 │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
+ "├─────┼───────┼────────────┼─────────┼───────┤\n",
+ "│ 1 │ 11 │ -0.560501 │ 9.83968 │ 15 │\n",
+ "│ 2 │ 12 │ -0.0192918 │ 7.81756 │ 14 │\n",
+ "│ 3 │ 13 │ 0.128064 │ 8.83897 │ 11 │\n",
+ "│ 4 │ 14 │ 1.85278 │ 9.36913 │ 10 │\n",
+ "│ 5 │ 15 │ -0.827763 │ 7.2771 │ 15 │\n",
+ "│ 6 │ 16 │ 0.110096 │ 9.77109 │ 15 │\n",
+ "│ 7 │ 17 │ -0.251176 │ 10.3317 │ 6 │\n",
+ "│ 8 │ 18 │ 0.369714 │ 9.18312 │ 5 │\n",
+ "│ 9 │ 19 │ 0.0721164 │ 7.98043 │ 12 │\n",
+ "│ 10 │ 20 │ -1.50343 │ 8.91239 │ 13 │\n",
+ "│ 11 │ 21 │ 1.56417 │ 7.54655 │ 14 │\n",
+ "│ 12 │ 22 │ -1.39674 │ 8.91657 │ 5 │\n",
+ "│ 13 │ 23 │ 1.1055 │ 8.62701 │ 8 │\n",
+ "│ 14 │ 24 │ -1.10673 │ 8.57414 │ 9 │\n",
+ "│ 15 │ 25 │ -3.21136 │ 9.34588 │ 5 │\n",
+ "│ 16 │ 26 │ -0.0740145 │ 11.0297 │ 9 │\n",
+ "│ 17 │ 27 │ 0.150976 │ 14.8349 │ 10 │\n",
+ "│ 18 │ 28 │ 0.769278 │ 9.38405 │ 14 │\n",
+ "│ 19 │ 29 │ -0.310153 │ 12.4906 │ 15 │\n",
+ "│ 20 │ 30 │ -0.602707 │ 9.9001 │ 7 │"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2 = df[11:end,:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "6. In a code cells below, show the results of computing simple descriptive statistics on this new DataFrame using the describe() function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | variable | mean | min | median | max | nunique | nmissing | eltype |
---|
| Symbol | Float64 | Real | Float64 | Real | Nothing | Nothing | DataType |
---|
4 rows × 8 columns
1 | ID | 20.5 | 11 | 20.5 | 30 | | | Int64 |
---|
2 | var1 | -0.187058 | -3.21136 | -0.0466532 | 1.85278 | | | Float64 |
---|
3 | var2 | 9.49853 | 7.2771 | 9.2645 | 14.8349 | | | Float64 |
---|
4 | var3 | 10.6 | 5 | 10.5 | 15 | | | Int64 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccccccc}\n",
+ "\t& variable & mean & min & median & max & nunique & nmissing & eltype\\\\\n",
+ "\t\\hline\n",
+ "\t& Symbol & Float64 & Real & Float64 & Real & Nothing & Nothing & DataType\\\\\n",
+ "\t\\hline\n",
+ "\t1 & ID & 20.5 & 11 & 20.5 & 30 & & & Int64 \\\\\n",
+ "\t2 & var1 & -0.187058 & -3.21136 & -0.0466532 & 1.85278 & & & Float64 \\\\\n",
+ "\t3 & var2 & 9.49853 & 7.2771 & 9.2645 & 14.8349 & & & Float64 \\\\\n",
+ "\t4 & var3 & 10.6 & 5 & 10.5 & 15 & & & Int64 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "4×8 DataFrame. Omitted printing of 2 columns\n",
+ "│ Row │ variable │ mean │ min │ median │ max │ nunique │\n",
+ "│ │ \u001b[90mSymbol\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mReal\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mReal\u001b[39m │ \u001b[90mNothing\u001b[39m │\n",
+ "├─────┼──────────┼───────────┼──────────┼────────────┼─────────┼─────────┤\n",
+ "│ 1 │ ID │ 20.5 │ 11 │ 20.5 │ 30 │ │\n",
+ "│ 2 │ var1 │ -0.187058 │ -3.21136 │ -0.0466532 │ 1.85278 │ │\n",
+ "│ 3 │ var2 │ 9.49853 │ 7.2771 │ 9.2645 │ 14.8349 │ │\n",
+ "│ 4 │ var3 │ 10.6 │ 5 │ 10.5 │ 15 │ │"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "describe(df2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "7. In a code cells below, add a column named cat1 to the df2 DataFrame consisting of a random selection of 20 values from the sample space GroupA and GroupB.m"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | ID | var1 | var2 | var3 | Col1 |
---|
| Int64 | Float64 | Float64 | Int64 | String |
---|
20 rows × 5 columns
1 | 11 | -0.560501 | 9.83968 | 15 | GroupA |
---|
2 | 12 | -0.0192918 | 7.81756 | 14 | GroupB |
---|
3 | 13 | 0.128064 | 8.83897 | 11 | GroupB |
---|
4 | 14 | 1.85278 | 9.36913 | 10 | GroupB |
---|
5 | 15 | -0.827763 | 7.2771 | 15 | GroupB |
---|
6 | 16 | 0.110096 | 9.77109 | 15 | GroupA |
---|
7 | 17 | -0.251176 | 10.3317 | 6 | GroupB |
---|
8 | 18 | 0.369714 | 9.18312 | 5 | GroupA |
---|
9 | 19 | 0.0721164 | 7.98043 | 12 | GroupB |
---|
10 | 20 | -1.50343 | 8.91239 | 13 | GroupA |
---|
11 | 21 | 1.56417 | 7.54655 | 14 | GroupB |
---|
12 | 22 | -1.39674 | 8.91657 | 5 | GroupB |
---|
13 | 23 | 1.1055 | 8.62701 | 8 | GroupA |
---|
14 | 24 | -1.10673 | 8.57414 | 9 | GroupA |
---|
15 | 25 | -3.21136 | 9.34588 | 5 | GroupA |
---|
16 | 26 | -0.0740145 | 11.0297 | 9 | GroupA |
---|
17 | 27 | 0.150976 | 14.8349 | 10 | GroupA |
---|
18 | 28 | 0.769278 | 9.38405 | 14 | GroupA |
---|
19 | 29 | -0.310153 | 12.4906 | 15 | GroupA |
---|
20 | 30 | -0.602707 | 9.9001 | 7 | GroupA |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccccc}\n",
+ "\t& ID & var1 & var2 & var3 & Col1\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Float64 & Float64 & Int64 & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 11 & -0.560501 & 9.83968 & 15 & GroupA \\\\\n",
+ "\t2 & 12 & -0.0192918 & 7.81756 & 14 & GroupB \\\\\n",
+ "\t3 & 13 & 0.128064 & 8.83897 & 11 & GroupB \\\\\n",
+ "\t4 & 14 & 1.85278 & 9.36913 & 10 & GroupB \\\\\n",
+ "\t5 & 15 & -0.827763 & 7.2771 & 15 & GroupB \\\\\n",
+ "\t6 & 16 & 0.110096 & 9.77109 & 15 & GroupA \\\\\n",
+ "\t7 & 17 & -0.251176 & 10.3317 & 6 & GroupB \\\\\n",
+ "\t8 & 18 & 0.369714 & 9.18312 & 5 & GroupA \\\\\n",
+ "\t9 & 19 & 0.0721164 & 7.98043 & 12 & GroupB \\\\\n",
+ "\t10 & 20 & -1.50343 & 8.91239 & 13 & GroupA \\\\\n",
+ "\t11 & 21 & 1.56417 & 7.54655 & 14 & GroupB \\\\\n",
+ "\t12 & 22 & -1.39674 & 8.91657 & 5 & GroupB \\\\\n",
+ "\t13 & 23 & 1.1055 & 8.62701 & 8 & GroupA \\\\\n",
+ "\t14 & 24 & -1.10673 & 8.57414 & 9 & GroupA \\\\\n",
+ "\t15 & 25 & -3.21136 & 9.34588 & 5 & GroupA \\\\\n",
+ "\t16 & 26 & -0.0740145 & 11.0297 & 9 & GroupA \\\\\n",
+ "\t17 & 27 & 0.150976 & 14.8349 & 10 & GroupA \\\\\n",
+ "\t18 & 28 & 0.769278 & 9.38405 & 14 & GroupA \\\\\n",
+ "\t19 & 29 & -0.310153 & 12.4906 & 15 & GroupA \\\\\n",
+ "\t20 & 30 & -0.602707 & 9.9001 & 7 & GroupA \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "20×5 DataFrame\n",
+ "│ Row │ ID │ var1 │ var2 │ var3 │ Col1 │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼───────┼────────────┼─────────┼───────┼────────┤\n",
+ "│ 1 │ 11 │ -0.560501 │ 9.83968 │ 15 │ GroupA │\n",
+ "│ 2 │ 12 │ -0.0192918 │ 7.81756 │ 14 │ GroupB │\n",
+ "│ 3 │ 13 │ 0.128064 │ 8.83897 │ 11 │ GroupB │\n",
+ "│ 4 │ 14 │ 1.85278 │ 9.36913 │ 10 │ GroupB │\n",
+ "│ 5 │ 15 │ -0.827763 │ 7.2771 │ 15 │ GroupB │\n",
+ "│ 6 │ 16 │ 0.110096 │ 9.77109 │ 15 │ GroupA │\n",
+ "│ 7 │ 17 │ -0.251176 │ 10.3317 │ 6 │ GroupB │\n",
+ "│ 8 │ 18 │ 0.369714 │ 9.18312 │ 5 │ GroupA │\n",
+ "│ 9 │ 19 │ 0.0721164 │ 7.98043 │ 12 │ GroupB │\n",
+ "│ 10 │ 20 │ -1.50343 │ 8.91239 │ 13 │ GroupA │\n",
+ "│ 11 │ 21 │ 1.56417 │ 7.54655 │ 14 │ GroupB │\n",
+ "│ 12 │ 22 │ -1.39674 │ 8.91657 │ 5 │ GroupB │\n",
+ "│ 13 │ 23 │ 1.1055 │ 8.62701 │ 8 │ GroupA │\n",
+ "│ 14 │ 24 │ -1.10673 │ 8.57414 │ 9 │ GroupA │\n",
+ "│ 15 │ 25 │ -3.21136 │ 9.34588 │ 5 │ GroupA │\n",
+ "│ 16 │ 26 │ -0.0740145 │ 11.0297 │ 9 │ GroupA │\n",
+ "│ 17 │ 27 │ 0.150976 │ 14.8349 │ 10 │ GroupA │\n",
+ "│ 18 │ 28 │ 0.769278 │ 9.38405 │ 14 │ GroupA │\n",
+ "│ 19 │ 29 │ -0.310153 │ 12.4906 │ 15 │ GroupA │\n",
+ "│ 20 │ 30 │ -0.602707 │ 9.9001 │ 7 │ GroupA │"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "insertcols!(df2,:Col1 => rand([\"GroupA\",\"GroupB\"],20))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "8. In a code cells below, create a DataFrame named df3 with columns named *id*, var4 and var5 such that id contains the values 11 through 30, var4 contains the values 21 through 40 and var5 contains the values 41 through 60."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | ID | var4 | var5 |
---|
| Int64 | Int64 | Int64 |
---|
20 rows × 3 columns
1 | 11 | 21 | 41 |
---|
2 | 12 | 22 | 42 |
---|
3 | 13 | 23 | 43 |
---|
4 | 14 | 24 | 44 |
---|
5 | 15 | 25 | 45 |
---|
6 | 16 | 26 | 46 |
---|
7 | 17 | 27 | 47 |
---|
8 | 18 | 28 | 48 |
---|
9 | 19 | 29 | 49 |
---|
10 | 20 | 30 | 50 |
---|
11 | 21 | 31 | 51 |
---|
12 | 22 | 32 | 52 |
---|
13 | 23 | 33 | 53 |
---|
14 | 24 | 34 | 54 |
---|
15 | 25 | 35 | 55 |
---|
16 | 26 | 36 | 56 |
---|
17 | 27 | 37 | 57 |
---|
18 | 28 | 38 | 58 |
---|
19 | 29 | 39 | 59 |
---|
20 | 30 | 40 | 60 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& ID & var4 & var5\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Int64 & Int64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 11 & 21 & 41 \\\\\n",
+ "\t2 & 12 & 22 & 42 \\\\\n",
+ "\t3 & 13 & 23 & 43 \\\\\n",
+ "\t4 & 14 & 24 & 44 \\\\\n",
+ "\t5 & 15 & 25 & 45 \\\\\n",
+ "\t6 & 16 & 26 & 46 \\\\\n",
+ "\t7 & 17 & 27 & 47 \\\\\n",
+ "\t8 & 18 & 28 & 48 \\\\\n",
+ "\t9 & 19 & 29 & 49 \\\\\n",
+ "\t10 & 20 & 30 & 50 \\\\\n",
+ "\t11 & 21 & 31 & 51 \\\\\n",
+ "\t12 & 22 & 32 & 52 \\\\\n",
+ "\t13 & 23 & 33 & 53 \\\\\n",
+ "\t14 & 24 & 34 & 54 \\\\\n",
+ "\t15 & 25 & 35 & 55 \\\\\n",
+ "\t16 & 26 & 36 & 56 \\\\\n",
+ "\t17 & 27 & 37 & 57 \\\\\n",
+ "\t18 & 28 & 38 & 58 \\\\\n",
+ "\t19 & 29 & 39 & 59 \\\\\n",
+ "\t20 & 30 & 40 & 60 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "20×3 DataFrame\n",
+ "│ Row │ ID │ var4 │ var5 │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
+ "├─────┼───────┼───────┼───────┤\n",
+ "│ 1 │ 11 │ 21 │ 41 │\n",
+ "│ 2 │ 12 │ 22 │ 42 │\n",
+ "│ 3 │ 13 │ 23 │ 43 │\n",
+ "│ 4 │ 14 │ 24 │ 44 │\n",
+ "│ 5 │ 15 │ 25 │ 45 │\n",
+ "│ 6 │ 16 │ 26 │ 46 │\n",
+ "│ 7 │ 17 │ 27 │ 47 │\n",
+ "│ 8 │ 18 │ 28 │ 48 │\n",
+ "│ 9 │ 19 │ 29 │ 49 │\n",
+ "│ 10 │ 20 │ 30 │ 50 │\n",
+ "│ 11 │ 21 │ 31 │ 51 │\n",
+ "│ 12 │ 22 │ 32 │ 52 │\n",
+ "│ 13 │ 23 │ 33 │ 53 │\n",
+ "│ 14 │ 24 │ 34 │ 54 │\n",
+ "│ 15 │ 25 │ 35 │ 55 │\n",
+ "│ 16 │ 26 │ 36 │ 56 │\n",
+ "│ 17 │ 27 │ 37 │ 57 │\n",
+ "│ 18 │ 28 │ 38 │ 58 │\n",
+ "│ 19 │ 29 │ 39 │ 59 │\n",
+ "│ 20 │ 30 │ 40 │ 60 │"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df3 = DataFrame(ID = collect(11:30), var4 = collect(21:40), var5 = collect(41:60))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "9. In a code cells below, do a join of DataFrames df2 and df3 on the id column and save the result as a new dataframe called df4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | ID | var1 | var2 | var3 | Col1 | var4 | var5 |
---|
| Int64 | Float64 | Float64 | Int64 | String | Int64 | Int64 |
---|
20 rows × 7 columns
1 | 11 | -0.560501 | 9.83968 | 15 | GroupA | 21 | 41 |
---|
2 | 12 | -0.0192918 | 7.81756 | 14 | GroupB | 22 | 42 |
---|
3 | 13 | 0.128064 | 8.83897 | 11 | GroupB | 23 | 43 |
---|
4 | 14 | 1.85278 | 9.36913 | 10 | GroupB | 24 | 44 |
---|
5 | 15 | -0.827763 | 7.2771 | 15 | GroupB | 25 | 45 |
---|
6 | 16 | 0.110096 | 9.77109 | 15 | GroupA | 26 | 46 |
---|
7 | 17 | -0.251176 | 10.3317 | 6 | GroupB | 27 | 47 |
---|
8 | 18 | 0.369714 | 9.18312 | 5 | GroupA | 28 | 48 |
---|
9 | 19 | 0.0721164 | 7.98043 | 12 | GroupB | 29 | 49 |
---|
10 | 20 | -1.50343 | 8.91239 | 13 | GroupA | 30 | 50 |
---|
11 | 21 | 1.56417 | 7.54655 | 14 | GroupB | 31 | 51 |
---|
12 | 22 | -1.39674 | 8.91657 | 5 | GroupB | 32 | 52 |
---|
13 | 23 | 1.1055 | 8.62701 | 8 | GroupA | 33 | 53 |
---|
14 | 24 | -1.10673 | 8.57414 | 9 | GroupA | 34 | 54 |
---|
15 | 25 | -3.21136 | 9.34588 | 5 | GroupA | 35 | 55 |
---|
16 | 26 | -0.0740145 | 11.0297 | 9 | GroupA | 36 | 56 |
---|
17 | 27 | 0.150976 | 14.8349 | 10 | GroupA | 37 | 57 |
---|
18 | 28 | 0.769278 | 9.38405 | 14 | GroupA | 38 | 58 |
---|
19 | 29 | -0.310153 | 12.4906 | 15 | GroupA | 39 | 59 |
---|
20 | 30 | -0.602707 | 9.9001 | 7 | GroupA | 40 | 60 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccccccc}\n",
+ "\t& ID & var1 & var2 & var3 & Col1 & var4 & var5\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Float64 & Float64 & Int64 & String & Int64 & Int64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 11 & -0.560501 & 9.83968 & 15 & GroupA & 21 & 41 \\\\\n",
+ "\t2 & 12 & -0.0192918 & 7.81756 & 14 & GroupB & 22 & 42 \\\\\n",
+ "\t3 & 13 & 0.128064 & 8.83897 & 11 & GroupB & 23 & 43 \\\\\n",
+ "\t4 & 14 & 1.85278 & 9.36913 & 10 & GroupB & 24 & 44 \\\\\n",
+ "\t5 & 15 & -0.827763 & 7.2771 & 15 & GroupB & 25 & 45 \\\\\n",
+ "\t6 & 16 & 0.110096 & 9.77109 & 15 & GroupA & 26 & 46 \\\\\n",
+ "\t7 & 17 & -0.251176 & 10.3317 & 6 & GroupB & 27 & 47 \\\\\n",
+ "\t8 & 18 & 0.369714 & 9.18312 & 5 & GroupA & 28 & 48 \\\\\n",
+ "\t9 & 19 & 0.0721164 & 7.98043 & 12 & GroupB & 29 & 49 \\\\\n",
+ "\t10 & 20 & -1.50343 & 8.91239 & 13 & GroupA & 30 & 50 \\\\\n",
+ "\t11 & 21 & 1.56417 & 7.54655 & 14 & GroupB & 31 & 51 \\\\\n",
+ "\t12 & 22 & -1.39674 & 8.91657 & 5 & GroupB & 32 & 52 \\\\\n",
+ "\t13 & 23 & 1.1055 & 8.62701 & 8 & GroupA & 33 & 53 \\\\\n",
+ "\t14 & 24 & -1.10673 & 8.57414 & 9 & GroupA & 34 & 54 \\\\\n",
+ "\t15 & 25 & -3.21136 & 9.34588 & 5 & GroupA & 35 & 55 \\\\\n",
+ "\t16 & 26 & -0.0740145 & 11.0297 & 9 & GroupA & 36 & 56 \\\\\n",
+ "\t17 & 27 & 0.150976 & 14.8349 & 10 & GroupA & 37 & 57 \\\\\n",
+ "\t18 & 28 & 0.769278 & 9.38405 & 14 & GroupA & 38 & 58 \\\\\n",
+ "\t19 & 29 & -0.310153 & 12.4906 & 15 & GroupA & 39 & 59 \\\\\n",
+ "\t20 & 30 & -0.602707 & 9.9001 & 7 & GroupA & 40 & 60 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "20×7 DataFrame\n",
+ "│ Row │ ID │ var1 │ var2 │ var3 │ Col1 │ var4 │ var5 │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
+ "├─────┼───────┼────────────┼─────────┼───────┼────────┼───────┼───────┤\n",
+ "│ 1 │ 11 │ -0.560501 │ 9.83968 │ 15 │ GroupA │ 21 │ 41 │\n",
+ "│ 2 │ 12 │ -0.0192918 │ 7.81756 │ 14 │ GroupB │ 22 │ 42 │\n",
+ "│ 3 │ 13 │ 0.128064 │ 8.83897 │ 11 │ GroupB │ 23 │ 43 │\n",
+ "│ 4 │ 14 │ 1.85278 │ 9.36913 │ 10 │ GroupB │ 24 │ 44 │\n",
+ "│ 5 │ 15 │ -0.827763 │ 7.2771 │ 15 │ GroupB │ 25 │ 45 │\n",
+ "│ 6 │ 16 │ 0.110096 │ 9.77109 │ 15 │ GroupA │ 26 │ 46 │\n",
+ "│ 7 │ 17 │ -0.251176 │ 10.3317 │ 6 │ GroupB │ 27 │ 47 │\n",
+ "│ 8 │ 18 │ 0.369714 │ 9.18312 │ 5 │ GroupA │ 28 │ 48 │\n",
+ "│ 9 │ 19 │ 0.0721164 │ 7.98043 │ 12 │ GroupB │ 29 │ 49 │\n",
+ "│ 10 │ 20 │ -1.50343 │ 8.91239 │ 13 │ GroupA │ 30 │ 50 │\n",
+ "│ 11 │ 21 │ 1.56417 │ 7.54655 │ 14 │ GroupB │ 31 │ 51 │\n",
+ "│ 12 │ 22 │ -1.39674 │ 8.91657 │ 5 │ GroupB │ 32 │ 52 │\n",
+ "│ 13 │ 23 │ 1.1055 │ 8.62701 │ 8 │ GroupA │ 33 │ 53 │\n",
+ "│ 14 │ 24 │ -1.10673 │ 8.57414 │ 9 │ GroupA │ 34 │ 54 │\n",
+ "│ 15 │ 25 │ -3.21136 │ 9.34588 │ 5 │ GroupA │ 35 │ 55 │\n",
+ "│ 16 │ 26 │ -0.0740145 │ 11.0297 │ 9 │ GroupA │ 36 │ 56 │\n",
+ "│ 17 │ 27 │ 0.150976 │ 14.8349 │ 10 │ GroupA │ 37 │ 57 │\n",
+ "│ 18 │ 28 │ 0.769278 │ 9.38405 │ 14 │ GroupA │ 38 │ 58 │\n",
+ "│ 19 │ 29 │ -0.310153 │ 12.4906 │ 15 │ GroupA │ 39 │ 59 │\n",
+ "│ 20 │ 30 │ -0.602707 │ 9.9001 │ 7 │ GroupA │ 40 │ 60 │"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df4 = innerjoin(df2,df3,on = :ID)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.2.0",
+ "language": "julia",
+ "name": "julia-1.2"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.2.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Week4_Working with data.ipynb b/Week4_Working with data.ipynb
new file mode 100644
index 0000000..f6eba56
--- /dev/null
+++ b/Week4_Working with data.ipynb
@@ -0,0 +1,2127 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Working with data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## In this lecture"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- [Introduction](#Introduction)\n",
+ "- [Distributions](#Distributions)\n",
+ "- [Normal distribution](#Normal-distribution)\n",
+ "- [Other distributions](#Other-distributions)\n",
+ "- [DataFrames](#DataFrames)\n",
+ " - [Combining dataframes](#Combining-dataframes)\n",
+ " - [Grouping](#Grouping)\n",
+ " - [Sorting](#Sorting)\n",
+ " - [Unique rows only](#Unique-rows-only)\n",
+ " - [Deleting rows](#Deleting-rows)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Introduction"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The ability to use data is fundamental to most modern computer coding taks. In this lecture, we will have a brief introduction to the way in which the Julia language incorporates data through the use of the `Distributions.jl` and `DataFrames.jl` packages."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Distributions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Data point values for a distribution usually follow a pattern. Such patterns are called distributions. Distributions are either discrete or continuous. The `Distribution.jl` package contains most of the common data distributions."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We will also use the `Random.jl` package to seed the pseudo-random number generator so that we can reproduce the random values that we are going to use in the lecture."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "using Distributions\n",
+ "using Random"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### The normal distribution"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The normal distribution is the famous bell-shaped curve that we are familiar with. Values around the mean occur most frequently and as values get progressively further away from the mean, they occur less frequently."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.0, 1.0)"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Seed the pseudo-random number generator\n",
+ "Random.seed!(1234)\n",
+ "#Saving the standard normal distribution as an object\n",
+ "n = Distributions.Normal() # This function is from the Distributions package\n",
+ "#Parameter values of the standard normal distribution\n",
+ "params(n)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using the `params()` function, we note a mean on $0$ and a standard deviation of $1$, also called the _standard normal distribution_."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `fieldnames()` function provides the actual parameters of the given distribution. In the case of the normal distribution, it will be the average and the standard deviation, namely $\\mu$ and $\\sigma$."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(:μ, :σ)"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Returning the parameters of the normal distribution\n",
+ "fieldnames(Normal)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we create a variable called `var1` and use the `rand()` function to create select $10$ random values from the standard normal distribution."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Seed the pseudo-random number generator\n",
+ "Random.seed!(1234)\n",
+ "#Select 10 elements at random from n\n",
+ "var1 = rand(n, 10);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can calculate the average and standard deviation of our randomly selected values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.18909179133831322"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Average\n",
+ "mean(var1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9879593623730926"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Standard deviation\n",
+ "std(var1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `pdf()` calculates the probability density function value of a given distribution up until a specified point (from $- \\infty$)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.38138781546052414"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Probability density function value at x = 0.3\n",
+ "pdf(Normal(), 0.3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `cdf()` functions calculates the cummulative distribution function value of a given distribution up until a specified point (from $- \\infty$)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.5987063256829237"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Cumulative distribution function as x = 0.25\n",
+ "cdf(Normal(), 0.25)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The values for the average and standard deviation can be specified."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Creating 100 data point values from a normal distribution\n",
+ "# with a mean of 100 and a standard deviation of 10\n",
+ "Random.seed!(1234)\n",
+ "var2 = rand(Normal(100, 10), 100);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "98.52365657772843"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calculating the mean of var2\n",
+ "mean(var2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9.580963685859091"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Calculating the standard deviation of var2\n",
+ "std(var2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The parameters of a set of values for a specified distribution can be returned."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Normal{Float64}(μ=98.52365657772843, σ=9.532938502804532)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Using fit() to calculate the parameters of a distribution\n",
+ "fit(Normal, var2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `quantiles()` function provides us with values for the specific percentiles (provided as fractions). Below we calculate the $2.5$% and $97.5$% percentile values of the standard normal distribution."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "-1.9599639845400592"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Quantiles\n",
+ "quantile(Normal(), 0.025)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.9599639845400576"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "quantile(Normal(), 0.975)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Other distributions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are many distributions in the `Distribution().jl` package. In the code below, a few of these are showcased by way of setting parameters, selecting random values, and fitting those value back to the distribution or returning the parameter field names."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Beta{Float64}(α=1.236721159927394, β=1.1368118923305863)"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Beta distribution\n",
+ "b = Beta(1, 1)\n",
+ "params(b)\n",
+ "Random.seed!(1234)\n",
+ "var3 = rand(b, 100);\n",
+ "fit(Beta, var3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(:ν,)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# χ2 distribution\n",
+ "c = Chisq(1)\n",
+ "Random.seed!(1234)\n",
+ "var4 = rand(c, 100)\n",
+ "fieldnames(Chisq) # Degrees of freedom"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Dataframes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `Dataframes.jl` package allows for creation of a flat data structure (rows and columns). Columns are variables and rows are subjects (examples)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "using DataFrames"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below, we create an empty dataframe object that we call `df`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Create and empty DataFrame\n",
+ "df = DataFrame();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Column headers representing statistical variable names are entered in square brackets as symbols, i.e. preceeded with a colon. We will attach the `var2` set of values as data point entries for this statistical variables."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add a column with data point values (rows)\n",
+ "df[:Var2] = var2;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can specify to print the first $5$ rows to the screen with the `first()` function,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Var2 |
---|
| Float64 |
---|
5 rows × 1 columns
1 | 108.673 |
---|
2 | 90.9826 |
---|
3 | 95.0552 |
---|
4 | 90.9709 |
---|
5 | 108.644 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|c}\n",
+ "\t& Var2\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 108.673 \\\\\n",
+ "\t2 & 90.9826 \\\\\n",
+ "\t3 & 95.0552 \\\\\n",
+ "\t4 & 90.9709 \\\\\n",
+ "\t5 & 108.644 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "5×1 DataFrame\n",
+ "│ Row │ Var2 │\n",
+ "│ │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼─────────┤\n",
+ "│ 1 │ 108.673 │\n",
+ "│ 2 │ 90.9826 │\n",
+ "│ 3 │ 95.0552 │\n",
+ "│ 4 │ 90.9709 │\n",
+ "│ 5 │ 108.644 │"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#View first five rows\n",
+ "first(df, 5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below, we create another statistical variable with some data point values that we already have in the waiting."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add another column\n",
+ "df[:Var3] = var3;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `last()` functions shows the last specified rows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Var2 | Var3 |
---|
| Float64 | Float64 |
---|
3 rows × 2 columns
1 | 95.5675 | 0.831916 |
---|
2 | 83.3677 | 0.221771 |
---|
3 | 94.7877 | 0.655592 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Var2 & Var3\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 95.5675 & 0.831916 \\\\\n",
+ "\t2 & 83.3677 & 0.221771 \\\\\n",
+ "\t3 & 94.7877 & 0.655592 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "3×2 DataFrame\n",
+ "│ Row │ Var2 │ Var3 │\n",
+ "│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼─────────┼──────────┤\n",
+ "│ 1 │ 95.5675 │ 0.831916 │\n",
+ "│ 2 │ 83.3677 │ 0.221771 │\n",
+ "│ 3 │ 94.7877 │ 0.655592 │"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# View last three rows\n",
+ "last(df, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `size()` function returns a tuple with the number of rows and columns returned,"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(100, 2)"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Dimensions of a DataFrame\n",
+ "size(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `describe()` functions attemps tp provide summary statistics of the variables>"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | variable | mean | min | median | max | nunique | nmissing | eltype |
---|
| Symbol | Float64 | Float64 | Float64 | Float64 | Nothing | Nothing | DataType |
---|
2 rows × 8 columns
1 | Var2 | 98.5237 | 67.8864 | 98.1718 | 124.175 | | | Float64 |
---|
2 | Var3 | 0.521047 | 0.00145384 | 0.522808 | 0.971161 | | | Float64 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccccccc}\n",
+ "\t& variable & mean & min & median & max & nunique & nmissing & eltype\\\\\n",
+ "\t\\hline\n",
+ "\t& Symbol & Float64 & Float64 & Float64 & Float64 & Nothing & Nothing & DataType\\\\\n",
+ "\t\\hline\n",
+ "\t1 & Var2 & 98.5237 & 67.8864 & 98.1718 & 124.175 & & & Float64 \\\\\n",
+ "\t2 & Var3 & 0.521047 & 0.00145384 & 0.522808 & 0.971161 & & & Float64 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "2×8 DataFrame. Omitted printing of 2 columns\n",
+ "│ Row │ variable │ mean │ min │ median │ max │ nunique │\n",
+ "│ │ \u001b[90mSymbol\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mNothing\u001b[39m │\n",
+ "├─────┼──────────┼──────────┼────────────┼──────────┼──────────┼─────────┤\n",
+ "│ 1 │ Var2 │ 98.5237 │ 67.8864 │ 98.1718 │ 124.175 │ │\n",
+ "│ 2 │ Var3 │ 0.521047 │ 0.00145384 │ 0.522808 │ 0.971161 │ │"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Summarize the content\n",
+ "describe(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The data type for each variable can be returned."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2-element Array{DataType,1}:\n",
+ " Float64\n",
+ " Float64"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Data type only\n",
+ "eltypes(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below we create a new instance of a dataframe object called `df2`. It contains four statistical variables. Note the use of symbol notation in creating the names of these variables."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 6 Create a bigger DataFrame\n",
+ "df2 = DataFrame()\n",
+ "df2[:A] = 1:10\n",
+ "df2[:B] = [\"I\", \"II\", \"II\", \"I\", \"II\",\"I\", \"II\", \"II\", \"I\", \"II\"]\n",
+ "Random.seed!(1234)\n",
+ "df2[:C] = rand(Normal(), 10)\n",
+ "df2[:D] = rand(Chisq(1), 10);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "By using indexing (in square brackets), we can refer to row and column values (i.e. _row, column_). Below is an example of seleting data point values for rows one through three, showing all the columns. The colon symbol serves as shortcut syntax for this selection."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | B | C | D |
---|
| Int64 | String | Float64 | Float64 |
---|
3 rows × 4 columns
1 | 1 | I | 0.867347 | 0.0123688 |
---|
2 | 2 | II | -0.901744 | 0.213586 |
---|
3 | 3 | II | -0.494479 | 0.00899443 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccc}\n",
+ "\t& A & B & C & D\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & String & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & I & 0.867347 & 0.0123688 \\\\\n",
+ "\t2 & 2 & II & -0.901744 & 0.213586 \\\\\n",
+ "\t3 & 3 & II & -0.494479 & 0.00899443 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "3×4 DataFrame\n",
+ "│ Row │ A │ B │ C │ D │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼───────┼────────┼───────────┼────────────┤\n",
+ "│ 1 │ 1 │ I │ 0.867347 │ 0.0123688 │\n",
+ "│ 2 │ 2 │ II │ -0.901744 │ 0.213586 │\n",
+ "│ 3 │ 3 │ II │ -0.494479 │ 0.00899443 │"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# First three rows with all the colums\n",
+ "df2[1:3, :]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If only specified columns, that is to say, not the range of one, two, and three as we did above, but rather only colums one and three, we create a list to indicate this."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | C |
---|
| Int64 | Float64 |
---|
10 rows × 2 columns
1 | 1 | 0.867347 |
---|
2 | 2 | -0.901744 |
---|
3 | 3 | -0.494479 |
---|
4 | 4 | -0.902914 |
---|
5 | 5 | 0.864401 |
---|
6 | 6 | 2.21188 |
---|
7 | 7 | 0.532813 |
---|
8 | 8 | -0.271735 |
---|
9 | 9 | 0.502334 |
---|
10 | 10 | -0.516984 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& A & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 0.867347 \\\\\n",
+ "\t2 & 2 & -0.901744 \\\\\n",
+ "\t3 & 3 & -0.494479 \\\\\n",
+ "\t4 & 4 & -0.902914 \\\\\n",
+ "\t5 & 5 & 0.864401 \\\\\n",
+ "\t6 & 6 & 2.21188 \\\\\n",
+ "\t7 & 7 & 0.532813 \\\\\n",
+ "\t8 & 8 & -0.271735 \\\\\n",
+ "\t9 & 9 & 0.502334 \\\\\n",
+ "\t10 & 10 & -0.516984 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "10×2 DataFrame\n",
+ "│ Row │ A │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼───────┼───────────┤\n",
+ "│ 1 │ 1 │ 0.867347 │\n",
+ "│ 2 │ 2 │ -0.901744 │\n",
+ "│ 3 │ 3 │ -0.494479 │\n",
+ "│ 4 │ 4 │ -0.902914 │\n",
+ "│ 5 │ 5 │ 0.864401 │\n",
+ "│ 6 │ 6 │ 2.21188 │\n",
+ "│ 7 │ 7 │ 0.532813 │\n",
+ "│ 8 │ 8 │ -0.271735 │\n",
+ "│ 9 │ 9 │ 0.502334 │\n",
+ "│ 10 │ 10 │ -0.516984 │"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# All rows columns 1 and 3\n",
+ "df2[:, [1, 3]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Instead of indicating the column numbers, we can also reference the actual column names (statistical variable names), using symbol notation, i.e. `:A`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | C |
---|
| Int64 | Float64 |
---|
10 rows × 2 columns
1 | 1 | 0.867347 |
---|
2 | 2 | -0.901744 |
---|
3 | 3 | -0.494479 |
---|
4 | 4 | -0.902914 |
---|
5 | 5 | 0.864401 |
---|
6 | 6 | 2.21188 |
---|
7 | 7 | 0.532813 |
---|
8 | 8 | -0.271735 |
---|
9 | 9 | 0.502334 |
---|
10 | 10 | -0.516984 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& A & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 0.867347 \\\\\n",
+ "\t2 & 2 & -0.901744 \\\\\n",
+ "\t3 & 3 & -0.494479 \\\\\n",
+ "\t4 & 4 & -0.902914 \\\\\n",
+ "\t5 & 5 & 0.864401 \\\\\n",
+ "\t6 & 6 & 2.21188 \\\\\n",
+ "\t7 & 7 & 0.532813 \\\\\n",
+ "\t8 & 8 & -0.271735 \\\\\n",
+ "\t9 & 9 & 0.502334 \\\\\n",
+ "\t10 & 10 & -0.516984 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "10×2 DataFrame\n",
+ "│ Row │ A │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼───────┼───────────┤\n",
+ "│ 1 │ 1 │ 0.867347 │\n",
+ "│ 2 │ 2 │ -0.901744 │\n",
+ "│ 3 │ 3 │ -0.494479 │\n",
+ "│ 4 │ 4 │ -0.902914 │\n",
+ "│ 5 │ 5 │ 0.864401 │\n",
+ "│ 6 │ 6 │ 2.21188 │\n",
+ "│ 7 │ 7 │ 0.532813 │\n",
+ "│ 8 │ 8 │ -0.271735 │\n",
+ "│ 9 │ 9 │ 0.502334 │\n",
+ "│ 10 │ 10 │ -0.516984 │"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Different notation\n",
+ "df2[:, [:A, :C]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `CSV.jl` package's `read()` function can import a comma separated values data file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Make sure to install the package in the REPL first\n",
+ "using CSV"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The file is saved in the same directory / folder as this notebook file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import csv file (in same directory / folder)\n",
+ "data1 = CSV.read(\"CCS.csv\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using the `type()` function, we note that we now have an instance of a dataframe object."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "DataFrame"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "typeof(data1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's view the first five rows of data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | PatientID | Cat1 | Cat2 | Var1 | Var2 | Var3 |
---|
| Int64⍰ | String⍰ | String⍰ | Float64⍰ | Float64⍰ | Float64⍰ |
---|
5 rows × 6 columns
1 | 1 | A | C | 38.2568 | 5.93913 | 35.0579 |
---|
2 | 2 | A | C | 17.8317 | 5.34754 | 21.131 |
---|
3 | 8 | A | B | 16.0218 | 6.60709 | 60.9436 |
---|
4 | 9 | A | C | 45.1158 | 6.00733 | 21.8797 |
---|
5 | 16 | A | C | 20.448 | 8.54819 | 20.6623 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccccc}\n",
+ "\t& PatientID & Cat1 & Cat2 & Var1 & Var2 & Var3\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64⍰ & String⍰ & String⍰ & Float64⍰ & Float64⍰ & Float64⍰\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & A & C & 38.2568 & 5.93913 & 35.0579 \\\\\n",
+ "\t2 & 2 & A & C & 17.8317 & 5.34754 & 21.131 \\\\\n",
+ "\t3 & 8 & A & B & 16.0218 & 6.60709 & 60.9436 \\\\\n",
+ "\t4 & 9 & A & C & 45.1158 & 6.00733 & 21.8797 \\\\\n",
+ "\t5 & 16 & A & C & 20.448 & 8.54819 & 20.6623 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "5×6 DataFrame\n",
+ "│ Row │ PatientID │ Cat1 │ Cat2 │ Var1 │ Var2 │ Var3 │\n",
+ "│ │ \u001b[90mInt64⍰\u001b[39m │ \u001b[90mString⍰\u001b[39m │ \u001b[90mString⍰\u001b[39m │ \u001b[90mFloat64⍰\u001b[39m │ \u001b[90mFloat64⍰\u001b[39m │ \u001b[90mFloat64⍰\u001b[39m │\n",
+ "├─────┼───────────┼─────────┼─────────┼──────────┼──────────┼──────────┤\n",
+ "│ 1 │ 1 │ A │ C │ 38.2568 │ 5.93913 │ 35.0579 │\n",
+ "│ 2 │ 2 │ A │ C │ 17.8317 │ 5.34754 │ 21.131 │\n",
+ "│ 3 │ 8 │ A │ B │ 16.0218 │ 6.60709 │ 60.9436 │\n",
+ "│ 4 │ 9 │ A │ C │ 45.1158 │ 6.00733 │ 21.8797 │\n",
+ "│ 5 │ 16 │ A │ C │ 20.448 │ 8.54819 │ 20.6623 │"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "first(data1, 5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `describe()` function will attempt to summarize all the variables. In the case of categorical variables, an alphabetical arrangement for minimum and maximum values will be stated."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | variable | mean | min | median | max | nunique | nmissing | eltype |
---|
| Symbol | Union… | Any | Union… | Any | Union… | Int64 | DataType |
---|
6 rows × 8 columns
1 | PatientID | 60.5 | 1 | 60.5 | 120 | | 0 | Int64 |
---|
2 | Cat1 | | A | | B | 2 | 0 | String |
---|
3 | Cat2 | | B | | X | 6 | 0 | String |
---|
4 | Var1 | 27.9679 | 15.2356 | 22.6801 | 84.2378 | | 0 | Float64 |
---|
5 | Var2 | 5.92121 | 3.01173 | 5.64241 | 15.5826 | | 0 | Float64 |
---|
6 | Var3 | 51.95 | 20.3153 | 44.3042 | 147.397 | | 0 | Float64 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccccccc}\n",
+ "\t& variable & mean & min & median & max & nunique & nmissing & eltype\\\\\n",
+ "\t\\hline\n",
+ "\t& Symbol & Union… & Any & Union… & Any & Union… & Int64 & DataType\\\\\n",
+ "\t\\hline\n",
+ "\t1 & PatientID & 60.5 & 1 & 60.5 & 120 & & 0 & Int64 \\\\\n",
+ "\t2 & Cat1 & & A & & B & 2 & 0 & String \\\\\n",
+ "\t3 & Cat2 & & B & & X & 6 & 0 & String \\\\\n",
+ "\t4 & Var1 & 27.9679 & 15.2356 & 22.6801 & 84.2378 & & 0 & Float64 \\\\\n",
+ "\t5 & Var2 & 5.92121 & 3.01173 & 5.64241 & 15.5826 & & 0 & Float64 \\\\\n",
+ "\t6 & Var3 & 51.95 & 20.3153 & 44.3042 & 147.397 & & 0 & Float64 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "6×8 DataFrame. Omitted printing of 1 columns\n",
+ "│ Row │ variable │ mean │ min │ median │ max │ nunique │ nmissing │\n",
+ "│ │ \u001b[90mSymbol\u001b[39m │ \u001b[90mUnion…\u001b[39m │ \u001b[90mAny\u001b[39m │ \u001b[90mUnion…\u001b[39m │ \u001b[90mAny\u001b[39m │ \u001b[90mUnion…\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
+ "├─────┼───────────┼─────────┼─────────┼─────────┼─────────┼─────────┼──────────┤\n",
+ "│ 1 │ PatientID │ 60.5 │ 1 │ 60.5 │ 120 │ │ 0 │\n",
+ "│ 2 │ Cat1 │ │ A │ │ B │ 2 │ 0 │\n",
+ "│ 3 │ Cat2 │ │ B │ │ X │ 6 │ 0 │\n",
+ "│ 4 │ Var1 │ 27.9679 │ 15.2356 │ 22.6801 │ 84.2378 │ │ 0 │\n",
+ "│ 5 │ Var2 │ 5.92121 │ 3.01173 │ 5.64241 │ 15.5826 │ │ 0 │\n",
+ "│ 6 │ Var3 │ 51.95 │ 20.3153 │ 44.3042 │ 147.397 │ │ 0 │"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "describe(data1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Combining dataframes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Combining dataframes on a common variable is a very useful operation. Below we create two dataframe instances. Note that both have a `Number` variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Creating DataFrames\n",
+ "subjects = DataFrame(Number = [100, 101, 102, 103], Stage = [\"I\", \"III\", \"II\", \"I\"])\n",
+ "treatment = DataFrame(Number = [103, 102, 101, 100], Treatment = [\"A\", \"B\", \"A\", \"B\"]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `join()` function takes the dataframe objects that require joining as arguments. The `on =` argument (in symbol form), specifies the variable on which to join. In this default mode, only values for the stated variable that appear in both dataframes will be included."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Number | Stage | Treatment |
---|
| Int64 | String | String |
---|
4 rows × 3 columns
1 | 100 | I | B |
---|
2 | 101 | III | A |
---|
3 | 102 | II | B |
---|
4 | 103 | I | A |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Number & Stage & Treatment\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & String & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 100 & I & B \\\\\n",
+ "\t2 & 101 & III & A \\\\\n",
+ "\t3 & 102 & II & B \\\\\n",
+ "\t4 & 103 & I & A \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "4×3 DataFrame\n",
+ "│ Row │ Number │ Stage │ Treatment │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼────────┼────────┼───────────┤\n",
+ "│ 1 │ 100 │ I │ B │\n",
+ "│ 2 │ 101 │ III │ A │\n",
+ "│ 3 │ 102 │ II │ B │\n",
+ "│ 4 │ 103 │ I │ A │"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Joining\n",
+ "df3 = join(subjects, treatment, on = :Number);\n",
+ "df3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Adding a longer list of subjects\n",
+ "subjects = DataFrame(Number = [100, 101, 102, 103, 104, 105], Stage = [\"I\", \"III\", \"II\", \"I\", \"II\", \"II\"]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `kind =` argument allows for more control. An inner join is the default (same as above)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Number | Stage | Treatment |
---|
| Int64 | String | String |
---|
4 rows × 3 columns
1 | 100 | I | B |
---|
2 | 101 | III | A |
---|
3 | 102 | II | B |
---|
4 | 103 | I | A |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Number & Stage & Treatment\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & String & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 100 & I & B \\\\\n",
+ "\t2 & 101 & III & A \\\\\n",
+ "\t3 & 102 & II & B \\\\\n",
+ "\t4 & 103 & I & A \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "4×3 DataFrame\n",
+ "│ Row │ Number │ Stage │ Treatment │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼────────┼────────┼───────────┤\n",
+ "│ 1 │ 100 │ I │ B │\n",
+ "│ 2 │ 101 │ III │ A │\n",
+ "│ 3 │ 102 │ II │ B │\n",
+ "│ 4 │ 103 │ I │ A │"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Inner join\n",
+ " df4 = join(subjects, treatment, on = :Number, kind = :inner);\n",
+ " df4"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "An outer join will join both dataframes and add `missing` data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Number | Stage | Treatment |
---|
| Int64⍰ | String⍰ | String⍰ |
---|
6 rows × 3 columns
1 | 100 | I | B |
---|
2 | 101 | III | A |
---|
3 | 102 | II | B |
---|
4 | 103 | I | A |
---|
5 | 104 | II | missing |
---|
6 | 105 | II | missing |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Number & Stage & Treatment\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64⍰ & String⍰ & String⍰\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 100 & I & B \\\\\n",
+ "\t2 & 101 & III & A \\\\\n",
+ "\t3 & 102 & II & B \\\\\n",
+ "\t4 & 103 & I & A \\\\\n",
+ "\t5 & 104 & II & \\\\\n",
+ "\t6 & 105 & II & \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "6×3 DataFrame\n",
+ "│ Row │ Number │ Stage │ Treatment │\n",
+ "│ │ \u001b[90mInt64⍰\u001b[39m │ \u001b[90mString⍰\u001b[39m │ \u001b[90mString⍰\u001b[39m │\n",
+ "├─────┼────────┼─────────┼───────────┤\n",
+ "│ 1 │ 100 │ I │ B │\n",
+ "│ 2 │ 101 │ III │ A │\n",
+ "│ 3 │ 102 │ II │ B │\n",
+ "│ 4 │ 103 │ I │ A │\n",
+ "│ 5 │ 104 │ II │ \u001b[90mmissing\u001b[39m │\n",
+ "│ 6 │ 105 │ II │ \u001b[90mmissing\u001b[39m │"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Outer joing: empty fields filled with missing\n",
+ "df5 = join(subjects, treatment, on = :Number, kind = :outer);\n",
+ "df5"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Grouping"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A dataframe can be _spliced_ by grouping rows according to values in a variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Group | Variable1 | Variable2 |
---|
| String | Float64 | Float64 |
---|
3 rows × 3 columns
1 | B | 0.447358 | 0.137658 |
---|
2 | B | -0.396211 | 0.60808 |
---|
3 | B | 0.366773 | 0.255054 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Group & Variable1 & Variable2\\\\\n",
+ "\t\\hline\n",
+ "\t& String & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & B & 0.447358 & 0.137658 \\\\\n",
+ "\t2 & B & -0.396211 & 0.60808 \\\\\n",
+ "\t3 & B & 0.366773 & 0.255054 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "3×3 DataFrame\n",
+ "│ Row │ Group │ Variable1 │ Variable2 │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼────────┼───────────┼───────────┤\n",
+ "│ 1 │ B │ 0.447358 │ 0.137658 │\n",
+ "│ 2 │ B │ -0.396211 │ 0.60808 │\n",
+ "│ 3 │ B │ 0.366773 │ 0.255054 │"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating a new DataFrame\n",
+ "df6 = DataFrame(Group = rand([\"A\", \"B\", \"C\"], 15), Variable1 = randn(15), Variable2 = rand(15));\n",
+ "first(df6, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `by()` function takes a dataframe object as first argument. This is followed by a column (variable) on which to group by. Below we use the `size` argument to indicate the number of rows and columns for the number of each unique values that are found in the specified variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Group | x1 |
---|
| String | Tuple… |
---|
3 rows × 2 columns
1 | B | (9, 3) |
---|
2 | A | (4, 3) |
---|
3 | C | (2, 3) |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Group & x1\\\\\n",
+ "\t\\hline\n",
+ "\t& String & Tuple…\\\\\n",
+ "\t\\hline\n",
+ "\t1 & B & (9, 3) \\\\\n",
+ "\t2 & A & (4, 3) \\\\\n",
+ "\t3 & C & (2, 3) \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "3×2 DataFrame\n",
+ "│ Row │ Group │ x1 │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mTuple…\u001b[39m │\n",
+ "├─────┼────────┼────────┤\n",
+ "│ 1 │ B │ (9, 3) │\n",
+ "│ 2 │ A │ (4, 3) │\n",
+ "│ 3 │ C │ (2, 3) │"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Grouping using by()\n",
+ "by(df6, :Group, size)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Since the dataframe has three columns, we note that as the second value in the `count` tuple returned above. The first value shows the number of instances of the unique values found for the specified variable."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below we create a dataframe instance that shows only the count of the unique values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Group | Count |
---|
| String | Int64 |
---|
3 rows × 2 columns
1 | B | 9 |
---|
2 | A | 4 |
---|
3 | C | 2 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Group & Count\\\\\n",
+ "\t\\hline\n",
+ "\t& String & Int64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & B & 9 \\\\\n",
+ "\t2 & A & 4 \\\\\n",
+ "\t3 & C & 2 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "3×2 DataFrame\n",
+ "│ Row │ Group │ Count │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
+ "├─────┼────────┼───────┤\n",
+ "│ 1 │ B │ 9 │\n",
+ "│ 2 │ A │ 4 │\n",
+ "│ 3 │ C │ 2 │"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Count unique data point values in :Group column\n",
+ "by(df6, :Group, dfc -> DataFrame(Count = size(dfc, 1)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `aggregate()` function also groups a dataframe by unique values for a specified column, but then provides the ability to list statistical tests required."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3×5 DataFrame\n",
+ "│ Row │ Group │ Variable1_mean │ Variable2_mean │ Variable1_std │ Variable2_std │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼────────┼────────────────┼────────────────┼───────────────┼───────────────┤\n",
+ "│ 1 │ B │ 0.127675 │ 0.446397 │ 0.973237 │ 0.268476 │\n",
+ "│ 2 │ A │ -0.33429 │ 0.339451 │ 1.04503 │ 0.352194 │\n",
+ "│ 3 │ C │ -0.902111 │ 0.373207 │ 1.51729 │ 0.368007 │"
+ ]
+ }
+ ],
+ "source": [
+ "# Aggregate for descriptive statistics\n",
+ "print(aggregate(df6, :Group, [mean, std]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `groupby()` function actually creates sub-dataframes based on the unique values found in the specified variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "GroupedDataFrame with 3 groups based on key: Group
First Group (9 rows): Group = \"B\"
| Group | Variable1 | Variable2 |
---|
| String | Float64 | Float64 |
---|
1 | B | 0.447358 | 0.137658 |
---|
2 | B | -0.396211 | 0.60808 |
---|
3 | B | 0.366773 | 0.255054 |
---|
4 | B | 0.621673 | 0.498734 |
---|
5 | B | 2.06353 | 0.52509 |
---|
6 | B | -1.41453 | 0.265511 |
---|
7 | B | 0.134475 | 0.110096 |
---|
8 | B | -0.750421 | 0.834362 |
---|
9 | B | 0.076418 | 0.78299 |
---|
⋮
Last Group (2 rows): Group = \"C\"
| Group | Variable1 | Variable2 |
---|
| String | Float64 | Float64 |
---|
1 | C | 0.170778 | 0.633427 |
---|
2 | C | -1.975 | 0.112987 |
---|
"
+ ],
+ "text/latex": [
+ "GroupedDataFrame with 3 groups based on key: Group\n",
+ "\n",
+ "First Group (9 rows): Group = \"B\"\n",
+ "\n",
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Group & Variable1 & Variable2\\\\\n",
+ "\t\\hline\n",
+ "\t& String & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & B & 0.447358 & 0.137658 \\\\\n",
+ "\t2 & B & -0.396211 & 0.60808 \\\\\n",
+ "\t3 & B & 0.366773 & 0.255054 \\\\\n",
+ "\t4 & B & 0.621673 & 0.498734 \\\\\n",
+ "\t5 & B & 2.06353 & 0.52509 \\\\\n",
+ "\t6 & B & -1.41453 & 0.265511 \\\\\n",
+ "\t7 & B & 0.134475 & 0.110096 \\\\\n",
+ "\t8 & B & -0.750421 & 0.834362 \\\\\n",
+ "\t9 & B & 0.076418 & 0.78299 \\\\\n",
+ "\\end{tabular}\n",
+ "\n",
+ "$\\dots$\n",
+ "\n",
+ "Last Group (2 rows): Group = \"C\"\n",
+ "\n",
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Group & Variable1 & Variable2\\\\\n",
+ "\t\\hline\n",
+ "\t& String & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & C & 0.170778 & 0.633427 \\\\\n",
+ "\t2 & C & -1.975 & 0.112987 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "GroupedDataFrame with 3 groups based on key: Group\n",
+ "First Group (9 rows): Group = \"B\"\n",
+ "│ Row │ Group │ Variable1 │ Variable2 │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼────────┼───────────┼───────────┤\n",
+ "│ 1 │ B │ 0.447358 │ 0.137658 │\n",
+ "│ 2 │ B │ -0.396211 │ 0.60808 │\n",
+ "│ 3 │ B │ 0.366773 │ 0.255054 │\n",
+ "│ 4 │ B │ 0.621673 │ 0.498734 │\n",
+ "│ 5 │ B │ 2.06353 │ 0.52509 │\n",
+ "│ 6 │ B │ -1.41453 │ 0.265511 │\n",
+ "│ 7 │ B │ 0.134475 │ 0.110096 │\n",
+ "│ 8 │ B │ -0.750421 │ 0.834362 │\n",
+ "│ 9 │ B │ 0.076418 │ 0.78299 │\n",
+ "⋮\n",
+ "Last Group (2 rows): Group = \"C\"\n",
+ "│ Row │ Group │ Variable1 │ Variable2 │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼────────┼───────────┼───────────┤\n",
+ "│ 1 │ C │ 0.170778 │ 0.633427 │\n",
+ "│ 2 │ C │ -1.975 │ 0.112987 │"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Group\n",
+ "groupby(df6, :Group)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "By calling the `length()` function, we note that there are indeed three sub-dataframes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "length(groupby(df6, :Group))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using indexing, we can select any of the three sub-dataframes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Group | Variable1 | Variable2 |
---|
| String | Float64 | Float64 |
---|
4 rows × 3 columns
1 | A | 0.182588 | 0.0940369 |
---|
2 | A | -1.58492 | 0.337865 |
---|
3 | A | 0.799335 | 0.838042 |
---|
4 | A | -0.734161 | 0.0878598 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Group & Variable1 & Variable2\\\\\n",
+ "\t\\hline\n",
+ "\t& String & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & A & 0.182588 & 0.0940369 \\\\\n",
+ "\t2 & A & -1.58492 & 0.337865 \\\\\n",
+ "\t3 & A & 0.799335 & 0.838042 \\\\\n",
+ "\t4 & A & -0.734161 & 0.0878598 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "4×3 SubDataFrame\n",
+ "│ Row │ Group │ Variable1 │ Variable2 │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼────────┼───────────┼───────────┤\n",
+ "│ 1 │ A │ 0.182588 │ 0.0940369 │\n",
+ "│ 2 │ A │ -1.58492 │ 0.337865 │\n",
+ "│ 3 │ A │ 0.799335 │ 0.838042 │\n",
+ "│ 4 │ A │ -0.734161 │ 0.0878598 │"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "groupby(df6, :Group)[2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Sorting"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Sorting using the `sort!()` function (permanent bang version used here), does what is says on the box. A list can be provided to sort by more than one variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | Group | Variable1 | Variable2 |
---|
| String | Float64 | Float64 |
---|
7 rows × 3 columns
1 | A | -1.58492 | 0.337865 |
---|
2 | A | -0.734161 | 0.0878598 |
---|
3 | A | 0.182588 | 0.0940369 |
---|
4 | A | 0.799335 | 0.838042 |
---|
5 | B | -1.41453 | 0.265511 |
---|
6 | B | -0.750421 | 0.834362 |
---|
7 | B | -0.396211 | 0.60808 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& Group & Variable1 & Variable2\\\\\n",
+ "\t\\hline\n",
+ "\t& String & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & A & -1.58492 & 0.337865 \\\\\n",
+ "\t2 & A & -0.734161 & 0.0878598 \\\\\n",
+ "\t3 & A & 0.182588 & 0.0940369 \\\\\n",
+ "\t4 & A & 0.799335 & 0.838042 \\\\\n",
+ "\t5 & B & -1.41453 & 0.265511 \\\\\n",
+ "\t6 & B & -0.750421 & 0.834362 \\\\\n",
+ "\t7 & B & -0.396211 & 0.60808 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "7×3 DataFrame\n",
+ "│ Row │ Group │ Variable1 │ Variable2 │\n",
+ "│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼────────┼───────────┼───────────┤\n",
+ "│ 1 │ A │ -1.58492 │ 0.337865 │\n",
+ "│ 2 │ A │ -0.734161 │ 0.0878598 │\n",
+ "│ 3 │ A │ 0.182588 │ 0.0940369 │\n",
+ "│ 4 │ A │ 0.799335 │ 0.838042 │\n",
+ "│ 5 │ B │ -1.41453 │ 0.265511 │\n",
+ "│ 6 │ B │ -0.750421 │ 0.834362 │\n",
+ "│ 7 │ B │ -0.396211 │ 0.60808 │"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df6S = sort!(df6, [:Group, :Variable1]);\n",
+ "first(df6S, 7)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Unique rows only"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below we create a dataframe with two identical rows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | B | C |
---|
| Int64 | Int64 | String |
---|
6 rows × 3 columns
1 | 1 | 11 | A |
---|
2 | 2 | 12 | B |
---|
3 | 2 | 12 | B |
---|
4 | 3 | 13 | C |
---|
5 | 4 | 14 | D |
---|
6 | 5 | 15 | E |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& A & B & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Int64 & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 11 & A \\\\\n",
+ "\t2 & 2 & 12 & B \\\\\n",
+ "\t3 & 2 & 12 & B \\\\\n",
+ "\t4 & 3 & 13 & C \\\\\n",
+ "\t5 & 4 & 14 & D \\\\\n",
+ "\t6 & 5 & 15 & E \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "6×3 DataFrame\n",
+ "│ Row │ A │ B │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼───────┼───────┼────────┤\n",
+ "│ 1 │ 1 │ 11 │ A │\n",
+ "│ 2 │ 2 │ 12 │ B │\n",
+ "│ 3 │ 2 │ 12 │ B │\n",
+ "│ 4 │ 3 │ 13 │ C │\n",
+ "│ 5 │ 4 │ 14 │ D │\n",
+ "│ 6 │ 5 │ 15 │ E │"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Creating a DataFrame with an obvious duplicate row\n",
+ "df7 = DataFrame(A = [1, 2, 2, 3, 4, 5], B = [11, 12, 12, 13, 14, 15], C = [\"A\", \"B\", \"B\", \"C\", \"D\", \"E\"]);\n",
+ "df7"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `unique()` function will, as the name implies, delete the duplicate row."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | B | C |
---|
| Int64 | Int64 | String |
---|
5 rows × 3 columns
1 | 1 | 11 | A |
---|
2 | 2 | 12 | B |
---|
3 | 3 | 13 | C |
---|
4 | 4 | 14 | D |
---|
5 | 5 | 15 | E |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& A & B & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Int64 & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 11 & A \\\\\n",
+ "\t2 & 2 & 12 & B \\\\\n",
+ "\t3 & 3 & 13 & C \\\\\n",
+ "\t4 & 4 & 14 & D \\\\\n",
+ "\t5 & 5 & 15 & E \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "5×3 DataFrame\n",
+ "│ Row │ A │ B │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼───────┼───────┼────────┤\n",
+ "│ 1 │ 1 │ 11 │ A │\n",
+ "│ 2 │ 2 │ 12 │ B │\n",
+ "│ 3 │ 3 │ 13 │ C │\n",
+ "│ 4 │ 4 │ 14 │ D │\n",
+ "│ 5 │ 5 │ 15 │ E │"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Only unique rows\n",
+ "unique(df7)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | B | C |
---|
| Int64 | Int64 | String |
---|
6 rows × 3 columns
1 | 1 | 11 | A |
---|
2 | 2 | 12 | B |
---|
3 | 2 | 12 | B |
---|
4 | 3 | 13 | C |
---|
5 | 4 | 14 | D |
---|
6 | 5 | 15 | E |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& A & B & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Int64 & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 11 & A \\\\\n",
+ "\t2 & 2 & 12 & B \\\\\n",
+ "\t3 & 2 & 12 & B \\\\\n",
+ "\t4 & 3 & 13 & C \\\\\n",
+ "\t5 & 4 & 14 & D \\\\\n",
+ "\t6 & 5 & 15 & E \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "6×3 DataFrame\n",
+ "│ Row │ A │ B │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼───────┼───────┼────────┤\n",
+ "│ 1 │ 1 │ 11 │ A │\n",
+ "│ 2 │ 2 │ 12 │ B │\n",
+ "│ 3 │ 2 │ 12 │ B │\n",
+ "│ 4 │ 3 │ 13 │ C │\n",
+ "│ 5 │ 4 │ 14 │ D │\n",
+ "│ 6 │ 5 │ 15 │ E │"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df7"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As always, the bang will make the change permament."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | B | C |
---|
| Int64 | Int64 | String |
---|
5 rows × 3 columns
1 | 1 | 11 | A |
---|
2 | 2 | 12 | B |
---|
3 | 3 | 13 | C |
---|
4 | 4 | 14 | D |
---|
5 | 5 | 15 | E |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& A & B & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Int64 & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 11 & A \\\\\n",
+ "\t2 & 2 & 12 & B \\\\\n",
+ "\t3 & 3 & 13 & C \\\\\n",
+ "\t4 & 4 & 14 & D \\\\\n",
+ "\t5 & 5 & 15 & E \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "5×3 DataFrame\n",
+ "│ Row │ A │ B │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼───────┼───────┼────────┤\n",
+ "│ 1 │ 1 │ 11 │ A │\n",
+ "│ 2 │ 2 │ 12 │ B │\n",
+ "│ 3 │ 3 │ 13 │ C │\n",
+ "│ 4 │ 4 │ 14 │ D │\n",
+ "│ 5 │ 5 │ 15 │ E │"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Permanant change\n",
+ "unique!(df7)\n",
+ "df7"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | B | C |
---|
| Int64 | Int64 | String |
---|
5 rows × 3 columns
1 | 1 | 11 | A |
---|
2 | 2 | 12 | B |
---|
3 | 3 | 13 | C |
---|
4 | 4 | 14 | D |
---|
5 | 5 | 15 | E |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& A & B & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Int64 & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1 & 11 & A \\\\\n",
+ "\t2 & 2 & 12 & B \\\\\n",
+ "\t3 & 3 & 13 & C \\\\\n",
+ "\t4 & 4 & 14 & D \\\\\n",
+ "\t5 & 5 & 15 & E \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "5×3 DataFrame\n",
+ "│ Row │ A │ B │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼───────┼───────┼────────┤\n",
+ "│ 1 │ 1 │ 11 │ A │\n",
+ "│ 2 │ 2 │ 12 │ B │\n",
+ "│ 3 │ 3 │ 13 │ C │\n",
+ "│ 4 │ 4 │ 14 │ D │\n",
+ "│ 5 │ 5 │ 15 │ E │"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Permanant change\n",
+ "unique!(df7)\n",
+ "df7"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Deleting rows"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `deleterows!()` function (permanent bang version used here), deletes specified rows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | A | B | C |
---|
| Int64 | Int64 | String |
---|
3 rows × 3 columns
1 | 2 | 12 | B |
---|
2 | 3 | 13 | C |
---|
3 | 4 | 14 | D |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& A & B & C\\\\\n",
+ "\t\\hline\n",
+ "\t& Int64 & Int64 & String\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 2 & 12 & B \\\\\n",
+ "\t2 & 3 & 13 & C \\\\\n",
+ "\t3 & 4 & 14 & D \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "3×3 DataFrame\n",
+ "│ Row │ A │ B │ C │\n",
+ "│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
+ "├─────┼───────┼───────┼────────┤\n",
+ "│ 1 │ 2 │ 12 │ B │\n",
+ "│ 2 │ 3 │ 13 │ C │\n",
+ "│ 3 │ 4 │ 14 │ D │"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Permanently\n",
+ "deleterows!(df7, [1, 5])\n",
+ "df7"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[Back to the top](#In-this-lecture)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.0.3",
+ "language": "julia",
+ "name": "julia-1.0"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.0.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Week4_Working_with_data.jl b/Week4_Working_with_data.jl
new file mode 100644
index 0000000..ab8fe6a
--- /dev/null
+++ b/Week4_Working_with_data.jl
@@ -0,0 +1,201 @@
+# WORKING WITH DATA
+# -----------------
+
+# I Distributions
+
+# * Data point values for a distribution usually follow a pattern
+# * Such patterns are called distributions
+# * Distributions are either discrete or continuous
+# * The Distribution.jl package contains most of the common
+# data distributions
+
+# 1 Importing Distributions.jl
+using Distributions
+using Random
+
+
+# 2 The standard normal distribution
+#Seed the pseudo-random number generator
+Random.seed!(1234)
+#Saving the standard normal distribution as an object
+n = Normal()
+#Parameter values of the standard normal distribution
+params(n)
+#Select 10 elements at random from n
+var1 = rand(n, 10)
+#Calculating the mean and standard deviation of var1
+mean(var1)
+std(var1)
+#Probability density function value at x = 0.3
+pdf(Normal(), 0.3)
+#Cumulative distribution function as x = 0.25
+cdf(Normal(), 0.25)
+#Quantiles
+quantile(Normal(), 0.025)
+quantile(Normal(), 0.975)
+
+# 3 The normal distribution
+#Returning the parameters of the normal distribution
+fieldnames(Normal)
+#Creating 100 data point values from a normal distribution
+# with a mean of 100 and a standard deviation of 10
+var2 = rand(Normal(100, 10), 100)
+#Calculating the mean and standard deviation of var2
+mean(var2)
+std(var2)
+#Using fit() to calculate the parameters of a distribution
+fit(Normal, var2)
+
+# 3 Skewness and kurtosis
+skewness(var2)
+kurtosis(var2)
+
+# 4 Beta distribution
+b = Beta(1, 1)
+params(b)
+var3 = rand(b, 100)
+fit(Beta, var3)
+
+# 5 χ2 distribution
+c = Chisq(1)
+var4 = rand(c, 100)
+fieldnames(Chisq) # Degrees of freedom
+
+# 6 Distribution types are hierarchical
+supertype(Normal)
+subtypes(Distribution{Univariate,Continuous})
+subtypes(Distribution{Univariate,Discrete})
+# * Search for help in the REPL
+
+# II DataFrames
+
+using DataFrames
+
+# * Allows for creation of a flat data structure (rows and columns)
+# * Columns are variables
+# * Rows are subjects (examples)
+
+# 1 Create a DataFrame
+typeof(var2)
+#Create and empty DataFrame
+df = DataFrame()
+
+# 2 Add a column with data point values (rows)
+df[:Var2] = var2
+#View first five rows
+head(df)
+
+# 3 Add another columns
+df[:Var3] = var3
+#View last three rows
+tail(df, 3)
+
+# 4 Dimensions of a DataFrame
+size(df)
+rows = size(df, 1)
+columns = size(df, 2)
+
+# 5 Inspect content
+showcols(df)
+#Data type only
+eltypes(df)
+#Descriptive statistics
+describe(df)
+#Print in console
+print(describe(df))
+
+# 6 Create a bigger DataFrame
+df2 = DataFrame()
+df2[:A] = 1:10
+df2[:B] = ["I", "II", "II", "I", "II","I", "II", "II", "I", "II"]
+df2[:C] = rand(Normal(), 10)
+df2[:D] = rand(Chisq(1), 10)
+
+# 7 Slicing
+#First three rows
+df2[1:3, :]
+#All rows columns 1 and 3
+df2[:, [1, 3]]
+#Different notation
+df2[:, [:A, :C]]
+
+# III Importing data files
+
+# 1 Set working directory in Atom settings under Julia tab
+pwd()
+
+# 2 Import CSV
+using CSV
+
+# 3 Import csv file (in same directory)
+data1 = CSV.read("CCS.csv")
+
+# 4 Explore the data
+typeof(data1)
+head(data1)
+showcols(data1)
+eltypes(data1)
+describe(data1)
+
+# 5 Combining DataFrames
+#Creating DataFrames
+subjects = DataFrame(Number = [100, 101, 102, 103], Stage = ["I", "III", "II", "I"])
+treatment = DataFrame(Number = [103, 102, 101, 100], Treatment = ["A", "B", "A", "B"])
+subjects
+treatment
+#Joining
+df3 = join(subjects, treatment, on = :Number);
+df3
+#Adding a longer list of subjects
+subjects = DataFrame(Number = [100, 101, 102, 103, 104, 105], Stage = ["I", "III", "II", "I", "II", "II"])
+#Inner join
+ df4 = join(subjects, treatment, on = :Number, kind = :inner);
+ df4
+#Outer joing: empty fields filled with missing
+df5 = join(subjects, treatment, on = :Number, kind = :outer);
+df5
+
+# 6 Grouping
+#Creating a new DataFrame
+df6 = DataFrame(Group = rand(["A", "B", "C"], 15), Variable1 = randn(15), Variable2 = rand(15));
+df6
+#Grouping using by()
+by(df6, :Group, size)
+#Count unique data point values in :Group column
+by(df6, :Group, dfc -> DataFrame(Count = size(dfc, 1)))
+#Aggregate for descriptive statistics
+print(aggregate(df6, :Group, [mean, std]))
+#Group
+groupby(df6, :Group)
+length(groupby(df6, :Group))
+groupby(df6, :Group)[2]
+
+# 7 Selection
+df6[:Variable1] .> 0
+sub(df6, df6[:Variable1] .> 0)
+
+# 8 New DataFrame by selection
+df6A = sub(df6, df6[:Group] .== "A");
+df6A
+
+# 9 Sorting
+df6S = sort!(df6, cols = [:Group, :Variable1], rev = true);
+df6S
+df6S2 = sort!(df6, cols = [:Group, :Variable1, :Variable2], rev = (false, false, true));
+df6S2
+
+# 10 Unique rows
+#Creating a DataFrame with an obvious duplicate row
+df7 = DataFrame(A = [1, 2, 2, 3, 4, 5], B = [11, 12, 12, 13, 14, 15], C = ["A", "B", "B", "C", "D", "E"]);
+df7
+#Only unique rows
+unique(df7)
+df7
+#Permanant change
+unique!(df7)
+df7
+
+# 11 Delete rows
+#Permanently
+deleterows!(df7, [1, 5])
+df7