2020-04-23 19:26:18 +00:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2020-04-23 23:36:20 +00:00
|
|
|
"execution_count": 24,
|
2020-04-23 19:26:18 +00:00
|
|
|
"metadata": {},
|
2020-04-23 23:36:20 +00:00
|
|
|
"outputs": [],
|
2020-04-23 19:26:18 +00:00
|
|
|
"source": [
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import torch\n",
|
|
|
|
"import torch.nn as nn\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
2020-04-23 23:36:20 +00:00
|
|
|
"from torch.utils.data import Dataset\n",
|
|
|
|
"from torch.utils.data import DataLoader\n",
|
|
|
|
"from torch.nn import BCELoss\n",
|
|
|
|
"from torch.optim import SGD"
|
2020-04-23 19:26:18 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 2,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Load the data set using pandas\n",
|
|
|
|
"data = pd.read_csv('diabetes.csv')\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>Number of times pregnant</th>\n",
|
|
|
|
" <th>Plasma glucose concentration</th>\n",
|
|
|
|
" <th>Diastolic blood pressure</th>\n",
|
|
|
|
" <th>Triceps skin fold thickness</th>\n",
|
|
|
|
" <th>2-Hour serum insulin</th>\n",
|
|
|
|
" <th>Body mass index</th>\n",
|
|
|
|
" <th>Age</th>\n",
|
|
|
|
" <th>Class</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>148</td>\n",
|
|
|
|
" <td>72</td>\n",
|
|
|
|
" <td>35</td>\n",
|
|
|
|
" <td>0</td>\n",
|
|
|
|
" <td>33.6</td>\n",
|
|
|
|
" <td>50</td>\n",
|
|
|
|
" <td>positive</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>1</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>85</td>\n",
|
|
|
|
" <td>66</td>\n",
|
|
|
|
" <td>29</td>\n",
|
|
|
|
" <td>0</td>\n",
|
|
|
|
" <td>26.6</td>\n",
|
|
|
|
" <td>31</td>\n",
|
|
|
|
" <td>negative</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>2</th>\n",
|
|
|
|
" <td>8</td>\n",
|
|
|
|
" <td>183</td>\n",
|
|
|
|
" <td>64</td>\n",
|
|
|
|
" <td>0</td>\n",
|
|
|
|
" <td>0</td>\n",
|
|
|
|
" <td>23.3</td>\n",
|
|
|
|
" <td>32</td>\n",
|
|
|
|
" <td>positive</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>3</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>89</td>\n",
|
|
|
|
" <td>66</td>\n",
|
|
|
|
" <td>23</td>\n",
|
|
|
|
" <td>94</td>\n",
|
|
|
|
" <td>28.1</td>\n",
|
|
|
|
" <td>21</td>\n",
|
|
|
|
" <td>negative</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>4</th>\n",
|
|
|
|
" <td>0</td>\n",
|
|
|
|
" <td>137</td>\n",
|
|
|
|
" <td>40</td>\n",
|
|
|
|
" <td>35</td>\n",
|
|
|
|
" <td>168</td>\n",
|
|
|
|
" <td>43.1</td>\n",
|
|
|
|
" <td>33</td>\n",
|
|
|
|
" <td>positive</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" Number of times pregnant Plasma glucose concentration \\\n",
|
|
|
|
"0 6 148 \n",
|
|
|
|
"1 1 85 \n",
|
|
|
|
"2 8 183 \n",
|
|
|
|
"3 1 89 \n",
|
|
|
|
"4 0 137 \n",
|
|
|
|
"\n",
|
|
|
|
" Diastolic blood pressure Triceps skin fold thickness \\\n",
|
|
|
|
"0 72 35 \n",
|
|
|
|
"1 66 29 \n",
|
|
|
|
"2 64 0 \n",
|
|
|
|
"3 66 23 \n",
|
|
|
|
"4 40 35 \n",
|
|
|
|
"\n",
|
|
|
|
" 2-Hour serum insulin Body mass index Age Class \n",
|
|
|
|
"0 0 33.6 50 positive \n",
|
|
|
|
"1 0 26.6 31 negative \n",
|
|
|
|
"2 0 23.3 32 positive \n",
|
|
|
|
"3 94 28.1 21 negative \n",
|
|
|
|
"4 168 43.1 33 positive "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"data.head() "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"x = data.iloc[:,0:-1].values"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"(768, 7)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"x.shape"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 6,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"y_string = list(data.iloc[:,-1])"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"768"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"len(y_string)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 8,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"y_int = []\n",
|
|
|
|
"for i in y_string:\n",
|
|
|
|
" if i == 'positive':\n",
|
|
|
|
" y_int.append(1)\n",
|
|
|
|
" else:\n",
|
|
|
|
" y_int.append(0)\n",
|
|
|
|
" "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 9,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"y = np.array(y_int, dtype='float64') "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 10,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# data normaalization\n",
|
|
|
|
"sc = StandardScaler()\n",
|
|
|
|
"x = sc.fit_transform(x)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 11,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"x = torch.tensor(x)\n",
|
|
|
|
"y = torch.tensor(y)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"torch.Size([768, 7])"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"x.shape"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 13,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"y = y.unsqueeze(1)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 14,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"torch.Size([768, 1])"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 14,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"y.shape"
|
|
|
|
]
|
|
|
|
},
|
2020-04-23 23:36:20 +00:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 15,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"class Dataset(Dataset):\n",
|
|
|
|
" def __init__(self,x,y):\n",
|
|
|
|
" self.x = x\n",
|
|
|
|
" self.y = y\n",
|
|
|
|
" \n",
|
|
|
|
" def __getitem__(self, index):\n",
|
|
|
|
" return self.x[index], self.y[index]\n",
|
|
|
|
" \n",
|
|
|
|
" def __len__(self):\n",
|
|
|
|
" return len(self.x)\n",
|
|
|
|
" "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 16,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"dataset = Dataset(x,y)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 17,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"768"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 17,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"len(dataset)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 18,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"train_loader = DataLoader(dataset=dataset,\n",
|
|
|
|
" batch_size=32,\n",
|
|
|
|
" shuffle=True)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 19,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"<torch.utils.data.dataloader.DataLoader at 0x12ced9810>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 19,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"train_loader"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 20,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"There are 24 batches in in the dataset\n",
|
|
|
|
"For one iteration (baatch) there are:\n",
|
|
|
|
"Data: torch.Size([32, 7])\n",
|
|
|
|
"lables: torch.Size([32, 1])\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# visualization of the train loader\n",
|
|
|
|
"print(\"There are {} batches in in the dataset\".format(len(train_loader)))\n",
|
|
|
|
"for (x,y) in train_loader:\n",
|
|
|
|
" print(\"For one iteration (baatch) there are:\")\n",
|
|
|
|
" print(\"Data: {}\".format(x.shape))\n",
|
|
|
|
" print(\"lables: {}\".format(y.shape))\n",
|
|
|
|
" break\n",
|
|
|
|
" "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 21,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"24.0"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 21,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"768/32"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 22,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"class Model(nn.Module):\n",
|
|
|
|
" def __init__(self, input_features,output_features): #,hidden_layer_1,hidden_layer_2,): caan be done this way or hard coded\n",
|
|
|
|
" super(Model, self).__init__()\n",
|
|
|
|
" self.fc1 = nn.Linear(input_features, 5)\n",
|
|
|
|
" self.fc2 = nn.Linear(5,4)\n",
|
|
|
|
" self.fc3 = nn.Linear(4,3)\n",
|
|
|
|
" self.fc4 = nn.Linear(3,output_features)\n",
|
|
|
|
" self.sigmoid = nn.Sigmoid()\n",
|
|
|
|
" self.tanh = nn.Tanh()\n",
|
|
|
|
" \n",
|
|
|
|
" def forward(self,x):\n",
|
|
|
|
" out = self.fc1(x)\n",
|
|
|
|
" out = self.tanh(out)\n",
|
|
|
|
" out = self.fc2(out)\n",
|
|
|
|
" out = self.tanh(out)\n",
|
|
|
|
" out = self.fc3(out)\n",
|
|
|
|
" out = self.tanh(out)\n",
|
|
|
|
" out = self.fc4(out)\n",
|
|
|
|
" out = self.sigmoid(out)\n",
|
|
|
|
" \n",
|
|
|
|
" return out\n",
|
|
|
|
" "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 26,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# The definition of the class Model object nets\n",
|
|
|
|
"net = Model(7,1)\n",
|
|
|
|
"\n",
|
|
|
|
"# Binary cross entropy was chosen as a the loss function\n",
|
|
|
|
"criterion = BCELoss(reduction='mean')\n",
|
|
|
|
"\n",
|
|
|
|
"# Define the optimizer\n",
|
|
|
|
"optimizer = SGD(net.parameters(), lr=0.1, momentum=0.9)\n",
|
|
|
|
"\n"
|
|
|
|
]
|
|
|
|
},
|
2020-04-23 19:26:18 +00:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
2020-04-23 23:36:20 +00:00
|
|
|
"source": [
|
|
|
|
" "
|
|
|
|
]
|
2020-04-23 19:26:18 +00:00
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.7.6"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 4
|
|
|
|
}
|