In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import BCELoss
from torch.optim import SGD



In [2]:
# Load the data set using pandas
data = pd.read_csv('diabetes.csv')


In [3]:
data.head() 

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive


In [4]:
x = data.iloc[:,0:-1].values

In [5]:
x.shape

(768, 7)

In [6]:
y_string = list(data.iloc[:,-1])

In [7]:
len(y_string)

768

In [8]:
y_int = []
for i in y_string:
    if i == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)
        

In [9]:
y = np.array(y_int, dtype='float64') 

In [10]:
# data normaalization
sc = StandardScaler()
x = sc.fit_transform(x)

In [11]:
x = torch.tensor(x)
y = torch.tensor(y)

In [12]:
x.shape

torch.Size([768, 7])

In [13]:
y = y.unsqueeze(1)

In [14]:
y.shape

torch.Size([768, 1])

In [15]:
class Dataset(Dataset):
    def __init__(self,x,y):
        self.x = x
        self.y = y
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
        

In [16]:
dataset = Dataset(x,y)

In [17]:
len(dataset)

768

In [18]:
train_loader = DataLoader(dataset=dataset,
           batch_size=32,
          shuffle=True)

In [19]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x124dd84d0>

In [20]:
# visualization of the train loader
print("There are {} batches in in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (baatch) there are:")
    print("Data:     {}".format(x.shape))
    print("labels:   {}".format(y.shape))
    break
    

There are 24 batches in in the dataset
For one iteration (baatch) there are:
Data:     torch.Size([32, 7])
lables:   torch.Size([32, 1])


In [21]:
768/32

24.0

In [22]:
class Model(nn.Module):
    def __init__(self, input_features,output_features): #,hidden_layer_1,hidden_layer_2,): caan be done this way or hard coded
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        self.fc4 = nn.Linear(3,output_features)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        
    def forward(self,x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        
        return out
    

In [23]:
# The definition of the class Model object nets
net = Model(7,1)

# Binary cross entropy was chosen as a the loss function
criterion = BCELoss(reduction='mean')

# Define the optimizer
optimizer = SGD(net.parameters(), lr=0.1, momentum=0.9)



In [25]:
# Training the network
epochs = 200
for epoch in range(epochs):
    for inputs, labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Forward propagation
        outputs = net(inputs)
        # Loss calculation
        loss = criterion(outputs,labels)
        # Clear gradient buffer (w <- w - lr*gradient)
        optimizer.zero_grad()
        # Back propagation
        loss.backward()
        # Update weights
        optimizer.step()
        
    # Accuracy caalculation
    output =  (outputs > 0.5).float()
    accuracy = (output == labels).float().mean()
    
    # Print statistics
    print("Epoch: {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1, epochs, loss, accuracy))
        

Epoch: 1/200, Loss: 0.551, Accuracy: 0.719
Epoch: 2/200, Loss: 0.473, Accuracy: 0.781
Epoch: 3/200, Loss: 0.536, Accuracy: 0.750
Epoch: 4/200, Loss: 0.519, Accuracy: 0.656
Epoch: 5/200, Loss: 0.453, Accuracy: 0.781
Epoch: 6/200, Loss: 0.666, Accuracy: 0.594
Epoch: 7/200, Loss: 0.455, Accuracy: 0.719
Epoch: 8/200, Loss: 0.617, Accuracy: 0.656
Epoch: 9/200, Loss: 0.367, Accuracy: 0.875
Epoch: 10/200, Loss: 0.496, Accuracy: 0.719
Epoch: 11/200, Loss: 0.483, Accuracy: 0.812
Epoch: 12/200, Loss: 0.556, Accuracy: 0.656
Epoch: 13/200, Loss: 0.455, Accuracy: 0.719
Epoch: 14/200, Loss: 0.504, Accuracy: 0.781
Epoch: 15/200, Loss: 0.492, Accuracy: 0.750
Epoch: 16/200, Loss: 0.603, Accuracy: 0.781
Epoch: 17/200, Loss: 0.417, Accuracy: 0.906
Epoch: 18/200, Loss: 0.450, Accuracy: 0.844
Epoch: 19/200, Loss: 0.313, Accuracy: 0.844
Epoch: 20/200, Loss: 0.642, Accuracy: 0.656
Epoch: 21/200, Loss: 0.369, Accuracy: 0.844
Epoch: 22/200, Loss: 0.499, Accuracy: 0.781
Epoch: 23/200, Loss: 0.477, Accuracy: 0.7