{ "cells": [ { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import pandas as pd\n", "from sklearn.preprocessing import StandardScaler\n", "from torch.utils.data import Dataset\n", "from torch.utils.data import DataLoader\n", "from torch.nn import BCELoss\n", "from torch.optim import SGD" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load the data set using pandas\n", "data = pd.read_csv('diabetes.csv')\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Number of times pregnantPlasma glucose concentrationDiastolic blood pressureTriceps skin fold thickness2-Hour serum insulinBody mass indexAgeClass
061487235033.650positive
11856629026.631negative
28183640023.332positive
318966239428.121negative
40137403516843.133positive
\n", "
" ], "text/plain": [ " Number of times pregnant Plasma glucose concentration \\\n", "0 6 148 \n", "1 1 85 \n", "2 8 183 \n", "3 1 89 \n", "4 0 137 \n", "\n", " Diastolic blood pressure Triceps skin fold thickness \\\n", "0 72 35 \n", "1 66 29 \n", "2 64 0 \n", "3 66 23 \n", "4 40 35 \n", "\n", " 2-Hour serum insulin Body mass index Age Class \n", "0 0 33.6 50 positive \n", "1 0 26.6 31 negative \n", "2 0 23.3 32 positive \n", "3 94 28.1 21 negative \n", "4 168 43.1 33 positive " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head() " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "x = data.iloc[:,0:-1].values" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(768, 7)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "y_string = list(data.iloc[:,-1])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "768" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(y_string)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "y_int = []\n", "for i in y_string:\n", " if i == 'positive':\n", " y_int.append(1)\n", " else:\n", " y_int.append(0)\n", " " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "y = np.array(y_int, dtype='float64') " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# data normaalization\n", "sc = StandardScaler()\n", "x = sc.fit_transform(x)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "x = torch.tensor(x)\n", "y = torch.tensor(y)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([768, 7])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shape" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "y = y.unsqueeze(1)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([768, 1])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y.shape" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "class Dataset(Dataset):\n", " def __init__(self,x,y):\n", " self.x = x\n", " self.y = y\n", " \n", " def __getitem__(self, index):\n", " return self.x[index], self.y[index]\n", " \n", " def __len__(self):\n", " return len(self.x)\n", " " ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "dataset = Dataset(x,y)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "768" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(dataset)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "train_loader = DataLoader(dataset=dataset,\n", " batch_size=32,\n", " shuffle=True)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_loader" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are 24 batches in in the dataset\n", "For one iteration (baatch) there are:\n", "Data: torch.Size([32, 7])\n", "lables: torch.Size([32, 1])\n" ] } ], "source": [ "# visualization of the train loader\n", "print(\"There are {} batches in in the dataset\".format(len(train_loader)))\n", "for (x,y) in train_loader:\n", " print(\"For one iteration (baatch) there are:\")\n", " print(\"Data: {}\".format(x.shape))\n", " print(\"lables: {}\".format(y.shape))\n", " break\n", " " ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "24.0" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "768/32" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "class Model(nn.Module):\n", " def __init__(self, input_features,output_features): #,hidden_layer_1,hidden_layer_2,): caan be done this way or hard coded\n", " super(Model, self).__init__()\n", " self.fc1 = nn.Linear(input_features, 5)\n", " self.fc2 = nn.Linear(5,4)\n", " self.fc3 = nn.Linear(4,3)\n", " self.fc4 = nn.Linear(3,output_features)\n", " self.sigmoid = nn.Sigmoid()\n", " self.tanh = nn.Tanh()\n", " \n", " def forward(self,x):\n", " out = self.fc1(x)\n", " out = self.tanh(out)\n", " out = self.fc2(out)\n", " out = self.tanh(out)\n", " out = self.fc3(out)\n", " out = self.tanh(out)\n", " out = self.fc4(out)\n", " out = self.sigmoid(out)\n", " \n", " return out\n", " " ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# The definition of the class Model object nets\n", "net = Model(7,1)\n", "\n", "# Binary cross entropy was chosen as a the loss function\n", "criterion = BCELoss(reduction='mean')\n", "\n", "# Define the optimizer\n", "optimizer = SGD(net.parameters(), lr=0.1, momentum=0.9)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }