{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from torch.utils.data import Dataset\n",
    "from torch.utils.data import DataLoader\n",
    "from torch.nn import BCELoss\n",
    "from torch.optim import SGD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the data set using pandas\n",
    "data = pd.read_csv('diabetes.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Number of times pregnant</th>\n",
       "      <th>Plasma glucose concentration</th>\n",
       "      <th>Diastolic blood pressure</th>\n",
       "      <th>Triceps skin fold thickness</th>\n",
       "      <th>2-Hour serum insulin</th>\n",
       "      <th>Body mass index</th>\n",
       "      <th>Age</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>50</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>31</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>32</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>21</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>33</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Number of times pregnant  Plasma glucose concentration  \\\n",
       "0                         6                           148   \n",
       "1                         1                            85   \n",
       "2                         8                           183   \n",
       "3                         1                            89   \n",
       "4                         0                           137   \n",
       "\n",
       "   Diastolic blood pressure  Triceps skin fold thickness  \\\n",
       "0                        72                           35   \n",
       "1                        66                           29   \n",
       "2                        64                            0   \n",
       "3                        66                           23   \n",
       "4                        40                           35   \n",
       "\n",
       "   2-Hour serum insulin  Body mass index  Age     Class  \n",
       "0                     0             33.6   50  positive  \n",
       "1                     0             26.6   31  negative  \n",
       "2                     0             23.3   32  positive  \n",
       "3                    94             28.1   21  negative  \n",
       "4                   168             43.1   33  positive  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = data.iloc[:,0:-1].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(768, 7)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_string = list(data.iloc[:,-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "768"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(y_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_int = []\n",
    "for i in y_string:\n",
    "    if i == 'positive':\n",
    "        y_int.append(1)\n",
    "    else:\n",
    "        y_int.append(0)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.array(y_int, dtype='float64') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data normaalization\n",
    "sc = StandardScaler()\n",
    "x = sc.fit_transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = torch.tensor(x)\n",
    "y = torch.tensor(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([768, 7])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = y.unsqueeze(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([768, 1])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Dataset(Dataset):\n",
    "    def __init__(self,x,y):\n",
    "        self.x = x\n",
    "        self.y = y\n",
    "        \n",
    "    def __getitem__(self, index):\n",
    "        return self.x[index], self.y[index]\n",
    "    \n",
    "    def __len__(self):\n",
    "        return len(self.x)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = Dataset(x,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "768"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader = DataLoader(dataset=dataset,\n",
    "           batch_size=32,\n",
    "          shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<torch.utils.data.dataloader.DataLoader at 0x12ced9810>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_loader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 24 batches in in the dataset\n",
      "For one iteration (baatch) there are:\n",
      "Data:     torch.Size([32, 7])\n",
      "lables:   torch.Size([32, 1])\n"
     ]
    }
   ],
   "source": [
    "# visualization of the train loader\n",
    "print(\"There are {} batches in in the dataset\".format(len(train_loader)))\n",
    "for (x,y) in train_loader:\n",
    "    print(\"For one iteration (baatch) there are:\")\n",
    "    print(\"Data:     {}\".format(x.shape))\n",
    "    print(\"lables:   {}\".format(y.shape))\n",
    "    break\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24.0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "768/32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Model(nn.Module):\n",
    "    def __init__(self, input_features,output_features): #,hidden_layer_1,hidden_layer_2,): caan be done this way or hard coded\n",
    "        super(Model, self).__init__()\n",
    "        self.fc1 = nn.Linear(input_features, 5)\n",
    "        self.fc2 = nn.Linear(5,4)\n",
    "        self.fc3 = nn.Linear(4,3)\n",
    "        self.fc4 = nn.Linear(3,output_features)\n",
    "        self.sigmoid = nn.Sigmoid()\n",
    "        self.tanh = nn.Tanh()\n",
    "        \n",
    "    def forward(self,x):\n",
    "        out = self.fc1(x)\n",
    "        out = self.tanh(out)\n",
    "        out = self.fc2(out)\n",
    "        out = self.tanh(out)\n",
    "        out = self.fc3(out)\n",
    "        out = self.tanh(out)\n",
    "        out = self.fc4(out)\n",
    "        out = self.sigmoid(out)\n",
    "        \n",
    "        return out\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The definition of the class Model object nets\n",
    "net = Model(7,1)\n",
    "\n",
    "# Binary cross entropy was chosen as a the loss function\n",
    "criterion = BCELoss(reduction='mean')\n",
    "\n",
    "# Define the optimizer\n",
    "optimizer = SGD(net.parameters(), lr=0.1, momentum=0.9)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "  "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}