pytorch-stuff/Scikit-Learn.ipynb

819 lines
20 KiB
Plaintext
Raw Normal View History

2020-04-14 17:37:47 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.preprocessing import MinMaxScaler"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data = np.random.randint(0,100,(10,2))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[53, 52],\n",
" [82, 96],\n",
" [60, 84],\n",
" [15, 90],\n",
" [78, 31],\n",
" [ 5, 12],\n",
" [32, 75],\n",
" [80, 39],\n",
" [31, 46],\n",
" [22, 29]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"scaler_model = MinMaxScaler()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"sklearn.preprocessing._data.MinMaxScaler"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(scaler_model)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MinMaxScaler(copy=True, feature_range=(0, 1))"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler_model.fit(data)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.62337662, 0.47619048],\n",
" [1. , 1. ],\n",
" [0.71428571, 0.85714286],\n",
" [0.12987013, 0.92857143],\n",
" [0.94805195, 0.22619048],\n",
" [0. , 0. ],\n",
" [0.35064935, 0.75 ],\n",
" [0.97402597, 0.32142857],\n",
" [0.33766234, 0.4047619 ],\n",
" [0.22077922, 0.20238095]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler_model.transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.62337662, 0.47619048],\n",
" [1. , 1. ],\n",
" [0.71428571, 0.85714286],\n",
" [0.12987013, 0.92857143],\n",
" [0.94805195, 0.22619048],\n",
" [0. , 0. ],\n",
" [0.35064935, 0.75 ],\n",
" [0.97402597, 0.32142857],\n",
" [0.33766234, 0.4047619 ],\n",
" [0.22077922, 0.20238095]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler_model.fit_transform(data)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/eddie/.pyenv/versions/3.7.3/envs/tensorflow-1.14/lib/python3.7/site-packages/pandas/compat/__init__.py:117: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.\n",
" warnings.warn(msg)\n"
]
}
],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"mydata = np.random.randint(0,101,(50,4))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 97, 70, 23, 81],\n",
" [ 60, 40, 73, 7],\n",
" [ 60, 88, 87, 67],\n",
" [ 42, 25, 81, 100],\n",
" [ 45, 30, 69, 72],\n",
" [ 16, 100, 70, 14],\n",
" [ 25, 76, 32, 70],\n",
" [ 2, 29, 46, 65],\n",
" [ 71, 33, 11, 79],\n",
" [ 98, 16, 76, 9],\n",
" [ 0, 78, 61, 34],\n",
" [ 78, 43, 45, 87],\n",
" [ 95, 78, 6, 34],\n",
" [ 14, 44, 95, 27],\n",
" [ 32, 31, 90, 58],\n",
" [ 72, 15, 16, 59],\n",
" [ 5, 72, 31, 36],\n",
" [ 72, 30, 94, 55],\n",
" [ 55, 19, 91, 74],\n",
" [ 9, 20, 10, 34],\n",
" [ 3, 6, 25, 49],\n",
" [ 63, 61, 86, 55],\n",
" [ 25, 21, 0, 85],\n",
" [ 80, 66, 73, 51],\n",
" [ 96, 46, 35, 58],\n",
" [ 7, 4, 89, 25],\n",
" [ 92, 11, 77, 59],\n",
" [ 38, 94, 19, 46],\n",
" [ 34, 24, 94, 70],\n",
" [100, 91, 46, 76],\n",
" [ 43, 10, 35, 78],\n",
" [ 15, 24, 57, 6],\n",
" [ 51, 47, 47, 55],\n",
" [ 83, 5, 84, 40],\n",
" [100, 22, 26, 72],\n",
" [ 60, 83, 80, 92],\n",
" [ 28, 39, 82, 17],\n",
" [ 56, 20, 94, 85],\n",
" [ 72, 56, 63, 54],\n",
" [ 15, 60, 30, 72],\n",
" [ 41, 21, 86, 54],\n",
" [ 85, 7, 50, 87],\n",
" [ 48, 13, 69, 93],\n",
" [ 75, 20, 98, 96],\n",
" [ 41, 18, 14, 31],\n",
" [ 84, 13, 62, 6],\n",
" [ 13, 40, 77, 60],\n",
" [ 70, 18, 84, 26],\n",
" [ 97, 22, 24, 34],\n",
" [ 56, 83, 9, 95]])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mydata"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(data=mydata, columns = ['f1','f2','f3','label'])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f1</th>\n",
" <th>f2</th>\n",
" <th>f3</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>97</td>\n",
" <td>70</td>\n",
" <td>23</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>60</td>\n",
" <td>40</td>\n",
" <td>73</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>60</td>\n",
" <td>88</td>\n",
" <td>87</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>42</td>\n",
" <td>25</td>\n",
" <td>81</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>45</td>\n",
" <td>30</td>\n",
" <td>69</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>16</td>\n",
" <td>100</td>\n",
" <td>70</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>25</td>\n",
" <td>76</td>\n",
" <td>32</td>\n",
" <td>70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2</td>\n",
" <td>29</td>\n",
" <td>46</td>\n",
" <td>65</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>71</td>\n",
" <td>33</td>\n",
" <td>11</td>\n",
" <td>79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>98</td>\n",
" <td>16</td>\n",
" <td>76</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0</td>\n",
" <td>78</td>\n",
" <td>61</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>78</td>\n",
" <td>43</td>\n",
" <td>45</td>\n",
" <td>87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>95</td>\n",
" <td>78</td>\n",
" <td>6</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>14</td>\n",
" <td>44</td>\n",
" <td>95</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>32</td>\n",
" <td>31</td>\n",
" <td>90</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>72</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>5</td>\n",
" <td>72</td>\n",
" <td>31</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>72</td>\n",
" <td>30</td>\n",
" <td>94</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>55</td>\n",
" <td>19</td>\n",
" <td>91</td>\n",
" <td>74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>9</td>\n",
" <td>20</td>\n",
" <td>10</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>25</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>63</td>\n",
" <td>61</td>\n",
" <td>86</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>25</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>80</td>\n",
" <td>66</td>\n",
" <td>73</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>96</td>\n",
" <td>46</td>\n",
" <td>35</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>89</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>92</td>\n",
" <td>11</td>\n",
" <td>77</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>38</td>\n",
" <td>94</td>\n",
" <td>19</td>\n",
" <td>46</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>34</td>\n",
" <td>24</td>\n",
" <td>94</td>\n",
" <td>70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>100</td>\n",
" <td>91</td>\n",
" <td>46</td>\n",
" <td>76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>43</td>\n",
" <td>10</td>\n",
" <td>35</td>\n",
" <td>78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>15</td>\n",
" <td>24</td>\n",
" <td>57</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>51</td>\n",
" <td>47</td>\n",
" <td>47</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>83</td>\n",
" <td>5</td>\n",
" <td>84</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>100</td>\n",
" <td>22</td>\n",
" <td>26</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>60</td>\n",
" <td>83</td>\n",
" <td>80</td>\n",
" <td>92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>28</td>\n",
" <td>39</td>\n",
" <td>82</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>56</td>\n",
" <td>20</td>\n",
" <td>94</td>\n",
" <td>85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>72</td>\n",
" <td>56</td>\n",
" <td>63</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>15</td>\n",
" <td>60</td>\n",
" <td>30</td>\n",
" <td>72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>41</td>\n",
" <td>21</td>\n",
" <td>86</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>85</td>\n",
" <td>7</td>\n",
" <td>50</td>\n",
" <td>87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>48</td>\n",
" <td>13</td>\n",
" <td>69</td>\n",
" <td>93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>75</td>\n",
" <td>20</td>\n",
" <td>98</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>41</td>\n",
" <td>18</td>\n",
" <td>14</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>84</td>\n",
" <td>13</td>\n",
" <td>62</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>13</td>\n",
" <td>40</td>\n",
" <td>77</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>70</td>\n",
" <td>18</td>\n",
" <td>84</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>97</td>\n",
" <td>22</td>\n",
" <td>24</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>56</td>\n",
" <td>83</td>\n",
" <td>9</td>\n",
" <td>95</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f1 f2 f3 label\n",
"0 97 70 23 81\n",
"1 60 40 73 7\n",
"2 60 88 87 67\n",
"3 42 25 81 100\n",
"4 45 30 69 72\n",
"5 16 100 70 14\n",
"6 25 76 32 70\n",
"7 2 29 46 65\n",
"8 71 33 11 79\n",
"9 98 16 76 9\n",
"10 0 78 61 34\n",
"11 78 43 45 87\n",
"12 95 78 6 34\n",
"13 14 44 95 27\n",
"14 32 31 90 58\n",
"15 72 15 16 59\n",
"16 5 72 31 36\n",
"17 72 30 94 55\n",
"18 55 19 91 74\n",
"19 9 20 10 34\n",
"20 3 6 25 49\n",
"21 63 61 86 55\n",
"22 25 21 0 85\n",
"23 80 66 73 51\n",
"24 96 46 35 58\n",
"25 7 4 89 25\n",
"26 92 11 77 59\n",
"27 38 94 19 46\n",
"28 34 24 94 70\n",
"29 100 91 46 76\n",
"30 43 10 35 78\n",
"31 15 24 57 6\n",
"32 51 47 47 55\n",
"33 83 5 84 40\n",
"34 100 22 26 72\n",
"35 60 83 80 92\n",
"36 28 39 82 17\n",
"37 56 20 94 85\n",
"38 72 56 63 54\n",
"39 15 60 30 72\n",
"40 41 21 86 54\n",
"41 85 7 50 87\n",
"42 48 13 69 93\n",
"43 75 20 98 96\n",
"44 41 18 14 31\n",
"45 84 13 62 6\n",
"46 13 40 77 60\n",
"47 70 18 84 26\n",
"48 97 22 24 34\n",
"49 56 83 9 95"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"X = df[['f1','f2','f3']]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"y = df['label']"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(33, 3)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(17, 3)"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}