{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/eddie/.pyenv/versions/3.7.6/envs/pytorch/lib/python3.7/site-packages/pandas/compat/__init__.py:117: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.\n", " warnings.warn(msg)\n" ] } ], "source": [ "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "import pandas as pd\n", "from sklearn.preprocessing import StandardScaler\n", "from torch.utils.data import Dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load the data set using pandas\n", "data = pd.read_csv('diabetes.csv')\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Number of times pregnantPlasma glucose concentrationDiastolic blood pressureTriceps skin fold thickness2-Hour serum insulinBody mass indexAgeClass
061487235033.650positive
11856629026.631negative
28183640023.332positive
318966239428.121negative
40137403516843.133positive
\n", "
" ], "text/plain": [ " Number of times pregnant Plasma glucose concentration \\\n", "0 6 148 \n", "1 1 85 \n", "2 8 183 \n", "3 1 89 \n", "4 0 137 \n", "\n", " Diastolic blood pressure Triceps skin fold thickness \\\n", "0 72 35 \n", "1 66 29 \n", "2 64 0 \n", "3 66 23 \n", "4 40 35 \n", "\n", " 2-Hour serum insulin Body mass index Age Class \n", "0 0 33.6 50 positive \n", "1 0 26.6 31 negative \n", "2 0 23.3 32 positive \n", "3 94 28.1 21 negative \n", "4 168 43.1 33 positive " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head() " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "x = data.iloc[:,0:-1].values" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(768, 7)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "y_string = list(data.iloc[:,-1])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "768" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(y_string)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "y_int = []\n", "for i in y_string:\n", " if i == 'positive':\n", " y_int.append(1)\n", " else:\n", " y_int.append(0)\n", " " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "y = np.array(y_int, dtype='float64') " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# data normaalization\n", "sc = StandardScaler()\n", "x = sc.fit_transform(x)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "x = torch.tensor(x)\n", "y = torch.tensor(y)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([768, 7])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.shape" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "y = y.unsqueeze(1)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([768, 1])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }