{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"using Distributions\n",
"using StatsBase\n",
"using CSV\n",
"using DataFrames\n",
"using HypothesisTests\n",
"using Plots\n",
"using GLM"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"test1 = CSV.read(\"ProjectData_1_point_0.csv\");"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
| Age | WCC | CRP | Treatment | Result |
---|
| Int64 | Float64 | Int64 | String | String |
---|
100 rows × 5 columns
1 | 39 | 9.3 | 10 | A | Worse |
---|
2 | 51 | 14.6 | 50 | A | Worse |
---|
3 | 75 | 7.1 | 50 | A | Improved |
---|
4 | 59 | 12.0 | 10 | A | Improved |
---|
5 | 62 | 13.6 | 60 | A | Static |
---|
6 | 59 | 11.9 | 30 | B | Static |
---|
7 | 31 | 10.6 | 10 | B | Static |
---|
8 | 63 | 11.8 | 70 | A | Worse |
---|
9 | 41 | 13.2 | 70 | A | Worse |
---|
10 | 45 | 13.6 | 90 | B | Worse |
---|
11 | 78 | 11.8 | 70 | B | Improved |
---|
12 | 36 | 13.7 | 50 | A | Improved |
---|
13 | 62 | 9.5 | 40 | A | Static |
---|
14 | 34 | 13.6 | 30 | A | Static |
---|
15 | 70 | 14.2 | 90 | A | Static |
---|
16 | 54 | 12.9 | 30 | A | Improved |
---|
17 | 18 | 12.3 | 20 | B | Worse |
---|
18 | 78 | 11.0 | 30 | A | Improved |
---|
19 | 53 | 12.7 | 60 | B | Worse |
---|
20 | 53 | 15.5 | 100 | A | Static |
---|
21 | 66 | 11.4 | 70 | A | Worse |
---|
22 | 80 | 12.1 | 20 | B | Static |
---|
23 | 46 | 10.3 | 70 | B | Improved |
---|
24 | 58 | 10.9 | 20 | B | Improved |
---|
25 | 68 | 12.2 | 20 | B | Improved |
---|
26 | 58 | 8.3 | 20 | B | Worse |
---|
27 | 20 | 11.7 | 40 | A | Worse |
---|
28 | 38 | 14.8 | 60 | A | Improved |
---|
29 | 76 | 12.4 | 30 | B | Improved |
---|
30 | 23 | 4.9 | 40 | B | Static |
---|
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|ccccc}\n",
"\t& Age & WCC & CRP & Treatment & Result\\\\\n",
"\t\\hline\n",
"\t& Int64 & Float64 & Int64 & String & String\\\\\n",
"\t\\hline\n",
"\t1 & 39 & 9.3 & 10 & A & Worse \\\\\n",
"\t2 & 51 & 14.6 & 50 & A & Worse \\\\\n",
"\t3 & 75 & 7.1 & 50 & A & Improved \\\\\n",
"\t4 & 59 & 12.0 & 10 & A & Improved \\\\\n",
"\t5 & 62 & 13.6 & 60 & A & Static \\\\\n",
"\t6 & 59 & 11.9 & 30 & B & Static \\\\\n",
"\t7 & 31 & 10.6 & 10 & B & Static \\\\\n",
"\t8 & 63 & 11.8 & 70 & A & Worse \\\\\n",
"\t9 & 41 & 13.2 & 70 & A & Worse \\\\\n",
"\t10 & 45 & 13.6 & 90 & B & Worse \\\\\n",
"\t11 & 78 & 11.8 & 70 & B & Improved \\\\\n",
"\t12 & 36 & 13.7 & 50 & A & Improved \\\\\n",
"\t13 & 62 & 9.5 & 40 & A & Static \\\\\n",
"\t14 & 34 & 13.6 & 30 & A & Static \\\\\n",
"\t15 & 70 & 14.2 & 90 & A & Static \\\\\n",
"\t16 & 54 & 12.9 & 30 & A & Improved \\\\\n",
"\t17 & 18 & 12.3 & 20 & B & Worse \\\\\n",
"\t18 & 78 & 11.0 & 30 & A & Improved \\\\\n",
"\t19 & 53 & 12.7 & 60 & B & Worse \\\\\n",
"\t20 & 53 & 15.5 & 100 & A & Static \\\\\n",
"\t21 & 66 & 11.4 & 70 & A & Worse \\\\\n",
"\t22 & 80 & 12.1 & 20 & B & Static \\\\\n",
"\t23 & 46 & 10.3 & 70 & B & Improved \\\\\n",
"\t24 & 58 & 10.9 & 20 & B & Improved \\\\\n",
"\t25 & 68 & 12.2 & 20 & B & Improved \\\\\n",
"\t26 & 58 & 8.3 & 20 & B & Worse \\\\\n",
"\t27 & 20 & 11.7 & 40 & A & Worse \\\\\n",
"\t28 & 38 & 14.8 & 60 & A & Improved \\\\\n",
"\t29 & 76 & 12.4 & 30 & B & Improved \\\\\n",
"\t30 & 23 & 4.9 & 40 & B & Static \\\\\n",
"\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"100×5 DataFrame\n",
"│ Row │ Age │ WCC │ CRP │ Treatment │ Result │\n",
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼───────┼─────────┼───────┼───────────┼──────────┤\n",
"│ 1 │ 39 │ 9.3 │ 10 │ A │ Worse │\n",
"│ 2 │ 51 │ 14.6 │ 50 │ A │ Worse │\n",
"│ 3 │ 75 │ 7.1 │ 50 │ A │ Improved │\n",
"│ 4 │ 59 │ 12.0 │ 10 │ A │ Improved │\n",
"│ 5 │ 62 │ 13.6 │ 60 │ A │ Static │\n",
"│ 6 │ 59 │ 11.9 │ 30 │ B │ Static │\n",
"│ 7 │ 31 │ 10.6 │ 10 │ B │ Static │\n",
"│ 8 │ 63 │ 11.8 │ 70 │ A │ Worse │\n",
"│ 9 │ 41 │ 13.2 │ 70 │ A │ Worse │\n",
"│ 10 │ 45 │ 13.6 │ 90 │ B │ Worse │\n",
"⋮\n",
"│ 90 │ 43 │ 12.5 │ 10 │ A │ Improved │\n",
"│ 91 │ 51 │ 14.1 │ 50 │ A │ Worse │\n",
"│ 92 │ 35 │ 10.2 │ 40 │ B │ Static │\n",
"│ 93 │ 52 │ 13.7 │ 70 │ A │ Worse │\n",
"│ 94 │ 33 │ 11.2 │ 20 │ A │ Improved │\n",
"│ 95 │ 30 │ 12.5 │ 110 │ A │ Worse │\n",
"│ 96 │ 60 │ 12.6 │ 30 │ A │ Static │\n",
"│ 97 │ 25 │ 13.0 │ 20 │ B │ Static │\n",
"│ 98 │ 76 │ 7.1 │ 10 │ A │ Static │\n",
"│ 99 │ 54 │ 16.1 │ 50 │ B │ Improved │\n",
"│ 100 │ 36 │ 11.8 │ 10 │ A │ Worse │"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Group | Variable1 | Variable2 |
---|
| String | Float64 | Float64 |
---|
20 rows × 3 columns
1 | B | -0.395164 | 0.785108 |
---|
2 | A | -0.477911 | 0.94664 |
---|
3 | B | 1.3248 | 0.683507 |
---|
4 | A | 2.18945 | 0.655638 |
---|
5 | B | -1.8022 | 0.40504 |
---|
6 | B | -0.0415339 | 0.0649169 |
---|
7 | A | -0.988776 | 0.579329 |
---|
8 | B | 0.452246 | 0.0049928 |
---|
9 | B | -1.23054 | 0.529847 |
---|
10 | A | -0.923378 | 0.258118 |
---|
11 | B | 0.874476 | 0.642472 |
---|
12 | A | -1.02009 | 0.0728385 |
---|
13 | B | -0.422106 | 0.319547 |
---|
14 | B | 0.953726 | 0.0257164 |
---|
15 | A | -2.52417 | 0.0487641 |
---|
16 | B | 0.494937 | 0.869764 |
---|
17 | A | 1.47704 | 0.824756 |
---|
18 | A | -0.133367 | 0.875263 |
---|
19 | B | -0.906293 | 0.0709776 |
---|
20 | B | -0.36659 | 0.452148 |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|ccc}\n",
"\t& Group & Variable1 & Variable2\\\\\n",
"\t\\hline\n",
"\t& String & Float64 & Float64\\\\\n",
"\t\\hline\n",
"\t1 & B & -0.395164 & 0.785108 \\\\\n",
"\t2 & A & -0.477911 & 0.94664 \\\\\n",
"\t3 & B & 1.3248 & 0.683507 \\\\\n",
"\t4 & A & 2.18945 & 0.655638 \\\\\n",
"\t5 & B & -1.8022 & 0.40504 \\\\\n",
"\t6 & B & -0.0415339 & 0.0649169 \\\\\n",
"\t7 & A & -0.988776 & 0.579329 \\\\\n",
"\t8 & B & 0.452246 & 0.0049928 \\\\\n",
"\t9 & B & -1.23054 & 0.529847 \\\\\n",
"\t10 & A & -0.923378 & 0.258118 \\\\\n",
"\t11 & B & 0.874476 & 0.642472 \\\\\n",
"\t12 & A & -1.02009 & 0.0728385 \\\\\n",
"\t13 & B & -0.422106 & 0.319547 \\\\\n",
"\t14 & B & 0.953726 & 0.0257164 \\\\\n",
"\t15 & A & -2.52417 & 0.0487641 \\\\\n",
"\t16 & B & 0.494937 & 0.869764 \\\\\n",
"\t17 & A & 1.47704 & 0.824756 \\\\\n",
"\t18 & A & -0.133367 & 0.875263 \\\\\n",
"\t19 & B & -0.906293 & 0.0709776 \\\\\n",
"\t20 & B & -0.36659 & 0.452148 \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"20×3 DataFrame\n",
"│ Row │ Group │ Variable1 │ Variable2 │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
"├─────┼────────┼────────────┼───────────┤\n",
"│ 1 │ B │ -0.395164 │ 0.785108 │\n",
"│ 2 │ A │ -0.477911 │ 0.94664 │\n",
"│ 3 │ B │ 1.3248 │ 0.683507 │\n",
"│ 4 │ A │ 2.18945 │ 0.655638 │\n",
"│ 5 │ B │ -1.8022 │ 0.40504 │\n",
"│ 6 │ B │ -0.0415339 │ 0.0649169 │\n",
"│ 7 │ A │ -0.988776 │ 0.579329 │\n",
"│ 8 │ B │ 0.452246 │ 0.0049928 │\n",
"│ 9 │ B │ -1.23054 │ 0.529847 │\n",
"│ 10 │ A │ -0.923378 │ 0.258118 │\n",
"│ 11 │ B │ 0.874476 │ 0.642472 │\n",
"│ 12 │ A │ -1.02009 │ 0.0728385 │\n",
"│ 13 │ B │ -0.422106 │ 0.319547 │\n",
"│ 14 │ B │ 0.953726 │ 0.0257164 │\n",
"│ 15 │ A │ -2.52417 │ 0.0487641 │\n",
"│ 16 │ B │ 0.494937 │ 0.869764 │\n",
"│ 17 │ A │ 1.47704 │ 0.824756 │\n",
"│ 18 │ A │ -0.133367 │ 0.875263 │\n",
"│ 19 │ B │ -0.906293 │ 0.0709776 │\n",
"│ 20 │ B │ -0.36659 │ 0.452148 │"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DataFrame(Group=rand([\"A\", \"B\"], 20), Variable1=randn(20), Variable2=rand(20))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"DataFrameRow (5 columns)
| Age | WCC | CRP | Treatment | Result |
---|
| Int64 | Float64 | Int64 | String | String |
---|
3 | 75 | 7.1 | 50 | A | Improved |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|ccccc}\n",
"\t& Age & WCC & CRP & Treatment & Result\\\\\n",
"\t\\hline\n",
"\t& Int64 & Float64 & Int64 & String & String\\\\\n",
"\t\\hline\n",
"\t3 & 75 & 7.1 & 50 & A & Improved \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"DataFrameRow\n",
"│ Row │ Age │ WCC │ CRP │ Treatment │ Result │\n",
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼───────┼─────────┼───────┼───────────┼──────────┤\n",
"│ 3 │ 75 │ 7.1 │ 50 │ A │ Improved │"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"view(test1,3,:)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | Age | WCC | CRP | Treatment | Result |
---|
| Int64 | Float64 | Int64 | String | String |
---|
97 rows × 5 columns
1 | 39 | 9.3 | 10 | A | Worse |
---|
2 | 51 | 14.6 | 50 | A | Worse |
---|
3 | 59 | 12.0 | 10 | A | Improved |
---|
4 | 59 | 11.9 | 30 | B | Static |
---|
5 | 31 | 10.6 | 10 | B | Static |
---|
6 | 63 | 11.8 | 70 | A | Worse |
---|
7 | 45 | 13.6 | 90 | B | Worse |
---|
8 | 78 | 11.8 | 70 | B | Improved |
---|
9 | 36 | 13.7 | 50 | A | Improved |
---|
10 | 62 | 9.5 | 40 | A | Static |
---|
11 | 34 | 13.6 | 30 | A | Static |
---|
12 | 70 | 14.2 | 90 | A | Static |
---|
13 | 54 | 12.9 | 30 | A | Improved |
---|
14 | 18 | 12.3 | 20 | B | Worse |
---|
15 | 78 | 11.0 | 30 | A | Improved |
---|
16 | 53 | 12.7 | 60 | B | Worse |
---|
17 | 53 | 15.5 | 100 | A | Static |
---|
18 | 66 | 11.4 | 70 | A | Worse |
---|
19 | 80 | 12.1 | 20 | B | Static |
---|
20 | 46 | 10.3 | 70 | B | Improved |
---|
21 | 58 | 10.9 | 20 | B | Improved |
---|
22 | 68 | 12.2 | 20 | B | Improved |
---|
23 | 58 | 8.3 | 20 | B | Worse |
---|
24 | 20 | 11.7 | 40 | A | Worse |
---|
25 | 38 | 14.8 | 60 | A | Improved |
---|
26 | 76 | 12.4 | 30 | B | Improved |
---|
27 | 23 | 4.9 | 40 | B | Static |
---|
28 | 80 | 13.7 | 80 | B | Static |
---|
29 | 65 | 12.4 | 10 | B | Improved |
---|
30 | 22 | 14.7 | 10 | B | Improved |
---|
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|ccccc}\n",
"\t& Age & WCC & CRP & Treatment & Result\\\\\n",
"\t\\hline\n",
"\t& Int64 & Float64 & Int64 & String & String\\\\\n",
"\t\\hline\n",
"\t1 & 39 & 9.3 & 10 & A & Worse \\\\\n",
"\t2 & 51 & 14.6 & 50 & A & Worse \\\\\n",
"\t3 & 59 & 12.0 & 10 & A & Improved \\\\\n",
"\t4 & 59 & 11.9 & 30 & B & Static \\\\\n",
"\t5 & 31 & 10.6 & 10 & B & Static \\\\\n",
"\t6 & 63 & 11.8 & 70 & A & Worse \\\\\n",
"\t7 & 45 & 13.6 & 90 & B & Worse \\\\\n",
"\t8 & 78 & 11.8 & 70 & B & Improved \\\\\n",
"\t9 & 36 & 13.7 & 50 & A & Improved \\\\\n",
"\t10 & 62 & 9.5 & 40 & A & Static \\\\\n",
"\t11 & 34 & 13.6 & 30 & A & Static \\\\\n",
"\t12 & 70 & 14.2 & 90 & A & Static \\\\\n",
"\t13 & 54 & 12.9 & 30 & A & Improved \\\\\n",
"\t14 & 18 & 12.3 & 20 & B & Worse \\\\\n",
"\t15 & 78 & 11.0 & 30 & A & Improved \\\\\n",
"\t16 & 53 & 12.7 & 60 & B & Worse \\\\\n",
"\t17 & 53 & 15.5 & 100 & A & Static \\\\\n",
"\t18 & 66 & 11.4 & 70 & A & Worse \\\\\n",
"\t19 & 80 & 12.1 & 20 & B & Static \\\\\n",
"\t20 & 46 & 10.3 & 70 & B & Improved \\\\\n",
"\t21 & 58 & 10.9 & 20 & B & Improved \\\\\n",
"\t22 & 68 & 12.2 & 20 & B & Improved \\\\\n",
"\t23 & 58 & 8.3 & 20 & B & Worse \\\\\n",
"\t24 & 20 & 11.7 & 40 & A & Worse \\\\\n",
"\t25 & 38 & 14.8 & 60 & A & Improved \\\\\n",
"\t26 & 76 & 12.4 & 30 & B & Improved \\\\\n",
"\t27 & 23 & 4.9 & 40 & B & Static \\\\\n",
"\t28 & 80 & 13.7 & 80 & B & Static \\\\\n",
"\t29 & 65 & 12.4 & 10 & B & Improved \\\\\n",
"\t30 & 22 & 14.7 & 10 & B & Improved \\\\\n",
"\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"97×5 DataFrame\n",
"│ Row │ Age │ WCC │ CRP │ Treatment │ Result │\n",
"│ │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼───────┼─────────┼───────┼───────────┼──────────┤\n",
"│ 1 │ 39 │ 9.3 │ 10 │ A │ Worse │\n",
"│ 2 │ 51 │ 14.6 │ 50 │ A │ Worse │\n",
"│ 3 │ 59 │ 12.0 │ 10 │ A │ Improved │\n",
"│ 4 │ 59 │ 11.9 │ 30 │ B │ Static │\n",
"│ 5 │ 31 │ 10.6 │ 10 │ B │ Static │\n",
"│ 6 │ 63 │ 11.8 │ 70 │ A │ Worse │\n",
"│ 7 │ 45 │ 13.6 │ 90 │ B │ Worse │\n",
"│ 8 │ 78 │ 11.8 │ 70 │ B │ Improved │\n",
"│ 9 │ 36 │ 13.7 │ 50 │ A │ Improved │\n",
"│ 10 │ 62 │ 9.5 │ 40 │ A │ Static │\n",
"⋮\n",
"│ 87 │ 43 │ 12.5 │ 10 │ A │ Improved │\n",
"│ 88 │ 51 │ 14.1 │ 50 │ A │ Worse │\n",
"│ 89 │ 35 │ 10.2 │ 40 │ B │ Static │\n",
"│ 90 │ 52 │ 13.7 │ 70 │ A │ Worse │\n",
"│ 91 │ 33 │ 11.2 │ 20 │ A │ Improved │\n",
"│ 92 │ 30 │ 12.5 │ 110 │ A │ Worse │\n",
"│ 93 │ 60 │ 12.6 │ 30 │ A │ Static │\n",
"│ 94 │ 25 │ 13.0 │ 20 │ B │ Static │\n",
"│ 95 │ 76 │ 7.1 │ 10 │ A │ Static │\n",
"│ 96 │ 54 │ 16.1 │ 50 │ B │ Improved │\n",
"│ 97 │ 36 │ 11.8 │ 10 │ A │ Worse │"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test1[[1:2;4:4;6:8;10:end],:]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"101.20293783633616"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"var(rand(Normal(80, 10), 200))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.0520584551299503"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean(randn(100))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"TDist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"EqualVarianceTTest()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.2.0",
"language": "julia",
"name": "julia-1.2"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.2.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}