pytorch-stuff/Diabetes_NN.ipynb

515 lines
12 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import pandas as pd\n",
"from sklearn.preprocessing import StandardScaler\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.data import DataLoader\n",
"from torch.nn import BCELoss\n",
"from torch.optim import SGD"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Load the data set using pandas\n",
"data = pd.read_csv('diabetes.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Number of times pregnant</th>\n",
" <th>Plasma glucose concentration</th>\n",
" <th>Diastolic blood pressure</th>\n",
" <th>Triceps skin fold thickness</th>\n",
" <th>2-Hour serum insulin</th>\n",
" <th>Body mass index</th>\n",
" <th>Age</th>\n",
" <th>Class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6</td>\n",
" <td>148</td>\n",
" <td>72</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>33.6</td>\n",
" <td>50</td>\n",
" <td>positive</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>85</td>\n",
" <td>66</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>26.6</td>\n",
" <td>31</td>\n",
" <td>negative</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>183</td>\n",
" <td>64</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>23.3</td>\n",
" <td>32</td>\n",
" <td>positive</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>23</td>\n",
" <td>94</td>\n",
" <td>28.1</td>\n",
" <td>21</td>\n",
" <td>negative</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>137</td>\n",
" <td>40</td>\n",
" <td>35</td>\n",
" <td>168</td>\n",
" <td>43.1</td>\n",
" <td>33</td>\n",
" <td>positive</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Number of times pregnant Plasma glucose concentration \\\n",
"0 6 148 \n",
"1 1 85 \n",
"2 8 183 \n",
"3 1 89 \n",
"4 0 137 \n",
"\n",
" Diastolic blood pressure Triceps skin fold thickness \\\n",
"0 72 35 \n",
"1 66 29 \n",
"2 64 0 \n",
"3 66 23 \n",
"4 40 35 \n",
"\n",
" 2-Hour serum insulin Body mass index Age Class \n",
"0 0 33.6 50 positive \n",
"1 0 26.6 31 negative \n",
"2 0 23.3 32 positive \n",
"3 94 28.1 21 negative \n",
"4 168 43.1 33 positive "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head() "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"x = data.iloc[:,0:-1].values"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(768, 7)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"y_string = list(data.iloc[:,-1])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"768"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(y_string)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"y_int = []\n",
"for i in y_string:\n",
" if i == 'positive':\n",
" y_int.append(1)\n",
" else:\n",
" y_int.append(0)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"y = np.array(y_int, dtype='float64') "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# data normaalization\n",
"sc = StandardScaler()\n",
"x = sc.fit_transform(x)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"x = torch.tensor(x)\n",
"y = torch.tensor(y)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([768, 7])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"y = y.unsqueeze(1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([768, 1])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"class Dataset(Dataset):\n",
" def __init__(self,x,y):\n",
" self.x = x\n",
" self.y = y\n",
" \n",
" def __getitem__(self, index):\n",
" return self.x[index], self.y[index]\n",
" \n",
" def __len__(self):\n",
" return len(self.x)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"dataset = Dataset(x,y)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"768"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"train_loader = DataLoader(dataset=dataset,\n",
" batch_size=32,\n",
" shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<torch.utils.data.dataloader.DataLoader at 0x12ced9810>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_loader"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 24 batches in in the dataset\n",
"For one iteration (baatch) there are:\n",
"Data: torch.Size([32, 7])\n",
"lables: torch.Size([32, 1])\n"
]
}
],
"source": [
"# visualization of the train loader\n",
"print(\"There are {} batches in in the dataset\".format(len(train_loader)))\n",
"for (x,y) in train_loader:\n",
" print(\"For one iteration (baatch) there are:\")\n",
" print(\"Data: {}\".format(x.shape))\n",
" print(\"lables: {}\".format(y.shape))\n",
" break\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24.0"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"768/32"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"class Model(nn.Module):\n",
" def __init__(self, input_features,output_features): #,hidden_layer_1,hidden_layer_2,): caan be done this way or hard coded\n",
" super(Model, self).__init__()\n",
" self.fc1 = nn.Linear(input_features, 5)\n",
" self.fc2 = nn.Linear(5,4)\n",
" self.fc3 = nn.Linear(4,3)\n",
" self.fc4 = nn.Linear(3,output_features)\n",
" self.sigmoid = nn.Sigmoid()\n",
" self.tanh = nn.Tanh()\n",
" \n",
" def forward(self,x):\n",
" out = self.fc1(x)\n",
" out = self.tanh(out)\n",
" out = self.fc2(out)\n",
" out = self.tanh(out)\n",
" out = self.fc3(out)\n",
" out = self.tanh(out)\n",
" out = self.fc4(out)\n",
" out = self.sigmoid(out)\n",
" \n",
" return out\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# The definition of the class Model object nets\n",
"net = Model(7,1)\n",
"\n",
"# Binary cross entropy was chosen as a the loss function\n",
"criterion = BCELoss(reduction='mean')\n",
"\n",
"# Define the optimizer\n",
"optimizer = SGD(net.parameters(), lr=0.1, momentum=0.9)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}