295 lines
42 KiB
Plaintext
Executable File
295 lines
42 KiB
Plaintext
Executable File
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import sklearn.datasets\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X,y = sklearn.datasets.make_moons(200, noise = 0.15)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.collections.PathCollection at 0x7f0d41ee6e10>"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"plt.scatter(X[:,0],X[:,1], c=y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(200, 2) (200,)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(X.shape, y.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Hyperparameters\n",
|
|
"input_neurons = 2\n",
|
|
"output_neurons = 2\n",
|
|
"samples = X.shape[0]\n",
|
|
"learning_rate = 0.001\n",
|
|
"lambda_reg = 0.01\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def retreive(model_dict):\n",
|
|
" W1 = model_dict['W1']\n",
|
|
" b1 = model_dict['b1']\n",
|
|
" W2 = model_dict['W2']\n",
|
|
" b2 = model_dict['b2']\n",
|
|
" \n",
|
|
" return W1, b1, W2, b2\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def forward(x, model_dict):\n",
|
|
" W1, b1, W2, b2 = retreive(model_dict)\n",
|
|
" z1 = X.dot(W1) + b1\n",
|
|
" a1 = np.tanh(z1)\n",
|
|
" z2 = a1.dot(W2) + b2\n",
|
|
" exp_scores = np.exp(z2)\n",
|
|
" softmax = exp_scores / np.sum(exp_scores, axis = 1, keepdims = True) \n",
|
|
" \n",
|
|
" return z1, a1, softmax\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def loss(softmax, y, model_dict):\n",
|
|
" W1, b1, W2, b2 = retreive(model_dict)\n",
|
|
" m = np.zeros(200)\n",
|
|
" for i,correct_index in enumerate(y):\n",
|
|
" predicted = softmax[i][correct_index]\n",
|
|
" m[i] = predicted\n",
|
|
" log_prob = -np.log(m)\n",
|
|
" loss = np.sum(log_prob)\n",
|
|
" reg_loss = lambda_reg / 2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))\n",
|
|
" loss+= reg_loss\n",
|
|
" \n",
|
|
" return float(loss / y.shape[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def predict(model_dict, x):\n",
|
|
" W1, b1, W2, b2 = retreive(model_dict)\n",
|
|
" z1 = x.dot(W1) + b1\n",
|
|
" a1 = np.tanh(z1)\n",
|
|
" z2 = a1.dot(W2) + b2\n",
|
|
" exp_scores = np.exp(z2)\n",
|
|
" softmax = exp_scores / np.sum(exp_scores, axis = 1, keepdims = True) # (200,2)\n",
|
|
" \n",
|
|
" return np.argmax(softmax, axis = 1) # (200,)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def backpropagation(x, y, model_dict, epochs):\n",
|
|
" for i in range(epochs):\n",
|
|
" W1, b1, W2, b2 = retreive(model_dict)\n",
|
|
" z1, a1, probs = forward(x, model_dict) # a1: (200,3), probs: (200,2)\n",
|
|
" delta3 = np.copy(probs)\n",
|
|
" delta3[range(x.shape[0]), y] -= 1 # (200,2)\n",
|
|
" dW2 = (a1.T).dot(delta3) # (3,2)\n",
|
|
" db2 = np.sum(delta3, axis=0, keepdims=True) # (1,2)\n",
|
|
" delta2 = delta3.dot(W2.T) * (1 - np.power(np.tanh(z1), 2))\n",
|
|
" dW1 = np.dot(x.T, delta2)\n",
|
|
" db1 = np.sum(delta2, axis=0)\n",
|
|
" \n",
|
|
" # Add regularization terms\n",
|
|
" dW2 += lambda_reg * np.sum(W2) \n",
|
|
" dW1 += lambda_reg * np.sum(W1) \n",
|
|
" \n",
|
|
" # Update Weights: W = W + (-lr*gradient) = W - lr*gradient\n",
|
|
" W1 += -learning_rate * dW1\n",
|
|
" b1 += -learning_rate * db1\n",
|
|
" W2 += -learning_rate * dW2\n",
|
|
" b2 += -learning_rate * db2\n",
|
|
" \n",
|
|
" # Update the model dictionary\n",
|
|
" model_dict = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}\n",
|
|
" \n",
|
|
" # Print the loss every 50 epochs\n",
|
|
" if i%50 == 0:\n",
|
|
" print(\"Loss at epoch {} is: {:.3f}\".format(i,loss(probs, y, model_dict)))\n",
|
|
" \n",
|
|
" return model_dict\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Define Initial Weights\n",
|
|
"def init_network(input_dim, hidden_dim, output_dim):\n",
|
|
" model = {}\n",
|
|
" # Xavier Initialization \n",
|
|
" W1 = np.random.randn(input_dim, hidden_dim) / np.sqrt(input_dim)\n",
|
|
" b1 = np.zeros((1, hidden_dim))\n",
|
|
" W2 = np.random.randn(hidden_dim, output_dim) / np.sqrt(hidden_dim)\n",
|
|
" b2 = np.zeros((1, output_dim))\n",
|
|
" model['W1'] = W1\n",
|
|
" model['b1'] = b1\n",
|
|
" model['W2'] = W2\n",
|
|
" model['b2'] = b2\n",
|
|
" return model\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Loss at epoch 0 is: 0.773\n",
|
|
"Loss at epoch 50 is: 0.330\n",
|
|
"Loss at epoch 100 is: 0.273\n",
|
|
"Loss at epoch 150 is: 0.262\n",
|
|
"Loss at epoch 200 is: 0.259\n",
|
|
"Loss at epoch 250 is: 0.258\n",
|
|
"Loss at epoch 300 is: 0.258\n",
|
|
"Loss at epoch 350 is: 0.258\n",
|
|
"Loss at epoch 400 is: 0.257\n",
|
|
"Loss at epoch 450 is: 0.257\n",
|
|
"Loss at epoch 500 is: 0.257\n",
|
|
"Loss at epoch 550 is: 0.256\n",
|
|
"Loss at epoch 600 is: 0.256\n",
|
|
"Loss at epoch 650 is: 0.256\n",
|
|
"Loss at epoch 700 is: 0.255\n",
|
|
"Loss at epoch 750 is: 0.255\n",
|
|
"Loss at epoch 800 is: 0.255\n",
|
|
"Loss at epoch 850 is: 0.255\n",
|
|
"Loss at epoch 900 is: 0.254\n",
|
|
"Loss at epoch 950 is: 0.254\n",
|
|
"Loss at epoch 1000 is: 0.254\n",
|
|
"Loss at epoch 1050 is: 0.254\n",
|
|
"Loss at epoch 1100 is: 0.253\n",
|
|
"Loss at epoch 1150 is: 0.253\n",
|
|
"Loss at epoch 1200 is: 0.253\n",
|
|
"Loss at epoch 1250 is: 0.253\n",
|
|
"Loss at epoch 1300 is: 0.252\n",
|
|
"Loss at epoch 1350 is: 0.252\n",
|
|
"Loss at epoch 1400 is: 0.252\n",
|
|
"Loss at epoch 1450 is: 0.252\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model_dict = init_network(input_dim=input_neurons,hidden_dim=3,output_dim=output_neurons)\n",
|
|
"\n",
|
|
"model = backpropagation(X,y,model_dict,epochs=1500)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|