1159 lines
30 KiB
Plaintext
Executable File
1159 lines
30 KiB
Plaintext
Executable File
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# This is the pruning of a single layer of the LeNet model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torch.nn as nn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torch\n",
|
|
"import torchvision"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torch.functional as F"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We have to check if the computer is cuda enabled"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The defintion of the model and its feedforward function"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class LeNet(nn.Module):\n",
|
|
" def __init__(self):\n",
|
|
" super(LeNet, self).__init__()\n",
|
|
" # 1 input image channel, 6 output channels, 3x3 square conv kernel\n",
|
|
" self.conv1 = nn.Conv2d(1, 6, 3)\n",
|
|
" self.conv2 = nn.Conv2d(6, 16, 3)\n",
|
|
" self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5x5 image dimension\n",
|
|
" self.fc2 = nn.Linear(120, 84)\n",
|
|
" self.fc3 = nn.Linear(84, 10)\n",
|
|
" \n",
|
|
" def forward(self, x):\n",
|
|
" x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n",
|
|
" x = F.max_pool2d(F.relu(self.conv2(x)), 2)\n",
|
|
" x = x.view(-1, int(x.nelement() / x.shape[0]))\n",
|
|
" x = F.relu(self.fc1(x))\n",
|
|
" x = F.relu(self.fc2(x))\n",
|
|
" x = self.fc3(x)\n",
|
|
" return x"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"A new object of LeNet class is initialized"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = LeNet().to(device=device)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We save the first convolutional layer of the LeNet object in variable module, its paramenters are showed"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('weight', Parameter containing:\n",
|
|
"tensor([[[[ 0.1114, 0.0445, -0.2559],\n",
|
|
" [-0.2359, 0.2247, 0.2104],\n",
|
|
" [-0.2143, -0.2063, 0.1992]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.2150, 0.2836],\n",
|
|
" [ 0.3173, -0.2818, 0.2136],\n",
|
|
" [-0.1059, 0.1499, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0675, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.1344],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3213, 0.1024, -0.2117],\n",
|
|
" [-0.0538, 0.2364, 0.2069],\n",
|
|
" [-0.0517, -0.1570, -0.1852]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.1578, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3220, 0.2642, -0.1456],\n",
|
|
" [ 0.1882, -0.0138, 0.1939],\n",
|
|
" [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True)), ('bias', Parameter containing:\n",
|
|
"tensor([ 0.1925, -0.1331, -0.1258, 0.2297, 0.0064, 0.3205], device='cuda:0',\n",
|
|
" requires_grad=True))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"module = model.conv1\n",
|
|
"#torch.save(module,\"layer_wo_pruning.pt\")\n",
|
|
"print(list(module.named_parameters()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We see that no new buffers have been added to the layer tensor"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(list(module.named_buffers()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The default pruning library is imported"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torch.nn.utils.prune as prune"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The random_unstructured function is passed to the conv1 layer weights, this adds a buffer named weight_mask which is a binary mask\n",
|
|
"applied to the parameter weight by the pruning method. Replaces the parameter weight by its pruned version and stores the\n",
|
|
"original in weight_orig"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"prune.random_unstructured(module, name=\"weight\", amount=0.3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('bias', Parameter containing:\n",
|
|
"tensor([ 0.1925, -0.1331, -0.1258, 0.2297, 0.0064, 0.3205], device='cuda:0',\n",
|
|
" requires_grad=True)), ('weight_orig', Parameter containing:\n",
|
|
"tensor([[[[ 0.1114, 0.0445, -0.2559],\n",
|
|
" [-0.2359, 0.2247, 0.2104],\n",
|
|
" [-0.2143, -0.2063, 0.1992]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.2150, 0.2836],\n",
|
|
" [ 0.3173, -0.2818, 0.2136],\n",
|
|
" [-0.1059, 0.1499, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0675, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.1344],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3213, 0.1024, -0.2117],\n",
|
|
" [-0.0538, 0.2364, 0.2069],\n",
|
|
" [-0.0517, -0.1570, -0.1852]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.1578, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3220, 0.2642, -0.1456],\n",
|
|
" [ 0.1882, -0.0138, 0.1939],\n",
|
|
" [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(list(module.named_parameters()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('weight_mask', tensor([[[[0., 1., 1.],\n",
|
|
" [0., 0., 0.],\n",
|
|
" [0., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 0., 1.],\n",
|
|
" [1., 0., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 1., 0.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [0., 0., 0.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 1., 1.],\n",
|
|
" [1., 0., 1.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 1., 1.],\n",
|
|
" [1., 1., 1.]]]], device='cuda:0'))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(list(module.named_buffers()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([[[[ 0.0000, 0.0445, -0.2559],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0000, -0.2063, 0.1992]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.0000, 0.2836],\n",
|
|
" [ 0.3173, -0.0000, 0.2136],\n",
|
|
" [-0.1059, 0.0000, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0000, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.0000],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3213, 0.0000, -0.2117],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0517, -0.1570, -0.1852]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.0000, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3220, 0.0000, -0.1456],\n",
|
|
" [ 0.1882, -0.0138, 0.1939],\n",
|
|
" [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0',\n",
|
|
" grad_fn=<MulBackward0>)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(module.weight)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"OrderedDict([(0, <torch.nn.utils.prune.RandomUnstructured object at 0x7f156c1b9a10>)])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(module._forward_pre_hooks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The l1_unstructured function is passed to the conv1 layer bias, this adds a buffer named bias_mask which is a binary mask\n",
|
|
"applied to the parameter bias by the pruning method. Replaces the parameter bias by its pruned version and stores the\n",
|
|
"original in bias_orig"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"prune.l1_unstructured(module, name=\"bias\", amount=3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('weight_orig', Parameter containing:\n",
|
|
"tensor([[[[ 0.1114, 0.0445, -0.2559],\n",
|
|
" [-0.2359, 0.2247, 0.2104],\n",
|
|
" [-0.2143, -0.2063, 0.1992]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.2150, 0.2836],\n",
|
|
" [ 0.3173, -0.2818, 0.2136],\n",
|
|
" [-0.1059, 0.1499, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0675, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.1344],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3213, 0.1024, -0.2117],\n",
|
|
" [-0.0538, 0.2364, 0.2069],\n",
|
|
" [-0.0517, -0.1570, -0.1852]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.1578, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3220, 0.2642, -0.1456],\n",
|
|
" [ 0.1882, -0.0138, 0.1939],\n",
|
|
" [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True)), ('bias_orig', Parameter containing:\n",
|
|
"tensor([ 0.1925, -0.1331, -0.1258, 0.2297, 0.0064, 0.3205], device='cuda:0',\n",
|
|
" requires_grad=True))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(list(module.named_parameters()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('weight_mask', tensor([[[[0., 1., 1.],\n",
|
|
" [0., 0., 0.],\n",
|
|
" [0., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 0., 1.],\n",
|
|
" [1., 0., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 1., 0.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [0., 0., 0.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 1., 1.],\n",
|
|
" [1., 0., 1.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 1., 1.],\n",
|
|
" [1., 1., 1.]]]], device='cuda:0')), ('bias_mask', tensor([1., 0., 0., 1., 0., 1.], device='cuda:0'))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(list(module.named_buffers()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([0.1925, -0.0000, -0.0000, 0.2297, 0.0000, 0.3205], device='cuda:0',\n",
|
|
" grad_fn=<MulBackward0>)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(module.bias)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"OrderedDict([(0, <torch.nn.utils.prune.RandomUnstructured object at 0x7f156c1b9a10>), (1, <torch.nn.utils.prune.L1Unstructured object at 0x7f14e2c47810>)])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(module._forward_pre_hooks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The ln_structured function is passed to the conv1 layer weight, this adds a buffer named weight_mask which is a binary mask\n",
|
|
"applied to the parameter weight by the pruning method. Replaces the parameter weight by its pruned version and stores the\n",
|
|
"original in weight_orig"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([[[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0000, -0.0000, 0.0000]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.0000, 0.2836],\n",
|
|
" [ 0.3173, -0.0000, 0.2136],\n",
|
|
" [-0.1059, 0.0000, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0000, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.0000],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0000, -0.0000, -0.0000]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.0000, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [ 0.0000, -0.0000, 0.0000],\n",
|
|
" [ 0.0000, -0.0000, -0.0000]]]], device='cuda:0',\n",
|
|
" grad_fn=<MulBackward0>)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"prune.ln_structured(module, name=\"weight\", amount=0.5, n=2, dim=0)\n",
|
|
"\n",
|
|
"# As we can verify, this will zero out all the connections corresponding to\n",
|
|
"# 50% (3 out of 6) of the channels, while preserving the action of the\n",
|
|
"# previous mask.\n",
|
|
"print(module.weight)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[<torch.nn.utils.prune.RandomUnstructured object at 0x7f156c1b9a10>, <torch.nn.utils.prune.LnStructured object at 0x7f14e2c52350>]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for hook in module._forward_pre_hooks.values():\n",
|
|
" if hook._tensor_name == \"weight\": # select out the correct hook\n",
|
|
" break\n",
|
|
"\n",
|
|
"print(list(hook)) # pruning history in the container\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"odict_keys(['conv1.weight_orig', 'conv1.bias_orig', 'conv1.weight_mask', 'conv1.bias_mask', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias'])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(model.state_dict().keys())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('weight_orig', Parameter containing:\n",
|
|
"tensor([[[[ 0.1114, 0.0445, -0.2559],\n",
|
|
" [-0.2359, 0.2247, 0.2104],\n",
|
|
" [-0.2143, -0.2063, 0.1992]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.2150, 0.2836],\n",
|
|
" [ 0.3173, -0.2818, 0.2136],\n",
|
|
" [-0.1059, 0.1499, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0675, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.1344],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3213, 0.1024, -0.2117],\n",
|
|
" [-0.0538, 0.2364, 0.2069],\n",
|
|
" [-0.0517, -0.1570, -0.1852]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.1578, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.3220, 0.2642, -0.1456],\n",
|
|
" [ 0.1882, -0.0138, 0.1939],\n",
|
|
" [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True)), ('bias_orig', Parameter containing:\n",
|
|
"tensor([ 0.1925, -0.1331, -0.1258, 0.2297, 0.0064, 0.3205], device='cuda:0',\n",
|
|
" requires_grad=True))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(list(module.named_parameters()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('weight_mask', tensor([[[[0., 0., 0.],\n",
|
|
" [0., 0., 0.],\n",
|
|
" [0., 0., 0.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 0., 1.],\n",
|
|
" [1., 0., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 0., 1.],\n",
|
|
" [1., 1., 0.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[0., 0., 0.],\n",
|
|
" [0., 0., 0.],\n",
|
|
" [0., 0., 0.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[1., 1., 1.],\n",
|
|
" [1., 0., 1.],\n",
|
|
" [1., 1., 1.]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[0., 0., 0.],\n",
|
|
" [0., 0., 0.],\n",
|
|
" [0., 0., 0.]]]], device='cuda:0')), ('bias_mask', tensor([1., 0., 0., 1., 0., 1.], device='cuda:0'))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(list(module.named_buffers()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"tensor([[[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0000, -0.0000, 0.0000]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.0000, 0.2836],\n",
|
|
" [ 0.3173, -0.0000, 0.2136],\n",
|
|
" [-0.1059, 0.0000, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0000, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.0000],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0000, -0.0000, -0.0000]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.0000, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [ 0.0000, -0.0000, 0.0000],\n",
|
|
" [ 0.0000, -0.0000, -0.0000]]]], device='cuda:0',\n",
|
|
" grad_fn=<MulBackward0>)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(module.weight)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Removes the pruning reparameterization from a module and the pruning method from the forward hook. The pruned\n",
|
|
"parameter named ``name`` remains permanently pruned, and the parameter named ``name+'_orig'`` is removed from \n",
|
|
"the parameter list. Similarly, the buffer named ``name+'_mask'`` is removed from the buffers."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[('bias_orig', Parameter containing:\n",
|
|
"tensor([ 0.1925, -0.1331, -0.1258, 0.2297, 0.0064, 0.3205], device='cuda:0',\n",
|
|
" requires_grad=True)), ('weight', Parameter containing:\n",
|
|
"tensor([[[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0000, -0.0000, 0.0000]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0273, 0.0000, 0.2836],\n",
|
|
" [ 0.3173, -0.0000, 0.2136],\n",
|
|
" [-0.1059, 0.0000, -0.1389]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.2346, 0.0000, -0.1149],\n",
|
|
" [ 0.2330, 0.2610, 0.0000],\n",
|
|
" [-0.2453, 0.1257, -0.1581]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [-0.0000, 0.0000, 0.0000],\n",
|
|
" [-0.0000, -0.0000, -0.0000]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1652, 0.0627, -0.2905],\n",
|
|
" [-0.1974, 0.0000, -0.2420],\n",
|
|
" [-0.1531, 0.1894, -0.2319]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0000, 0.0000, -0.0000],\n",
|
|
" [ 0.0000, -0.0000, 0.0000],\n",
|
|
" [ 0.0000, -0.0000, -0.0000]]]], device='cuda:0', requires_grad=True))]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"prune.remove(module, 'weight')\n",
|
|
"#torch.save(module,\"layer_w_pruning.pt\")\n",
|
|
"print(list(module.named_parameters()))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Pruning multiple parameters in the model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"By specifying the desired pruning technique and parameters, we can easily prune multiple tensors in a network, perhaps according to their type, as we will see in this example."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#from pytorch_modelsize import SizeEstimator"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# (Batch, Channels, Height, Width) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"dict_keys(['conv1.weight_mask', 'conv2.weight_mask', 'fc1.weight_mask', 'fc2.weight_mask', 'fc3.weight_mask'])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"new_model = LeNet()\n",
|
|
"#se = SizeEstimator(new_model)\n",
|
|
"#print(se.estimate_size())\n",
|
|
"for name, module in new_model.named_modules():\n",
|
|
" # prune 20% of connections in all 2D-conv layers\n",
|
|
" if isinstance(module, torch.nn.Conv2d):\n",
|
|
" prune.l1_unstructured(module, name='weight', amount=0.2)\n",
|
|
" # prune 40% of connections in all linear layers\n",
|
|
" elif isinstance(module, torch.nn.Linear):\n",
|
|
" prune.l1_unstructured(module, name='weight', amount=0.4)\n",
|
|
"\n",
|
|
"#torch.save(new_model,\"model_2.pt\")\n",
|
|
"print(dict(new_model.named_buffers()).keys()) # to verify that all masks exist"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Global Pruning"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = LeNet()\n",
|
|
"\n",
|
|
"parameters_to_prune = (\n",
|
|
" (model.conv1, 'weight'),\n",
|
|
" (model.conv2, 'weight'),\n",
|
|
" (model.fc1, 'weight'),\n",
|
|
" (model.fc2, 'weight'),\n",
|
|
" (model.fc3, 'weight'),\n",
|
|
")\n",
|
|
"\n",
|
|
"prune.global_unstructured(\n",
|
|
" parameters_to_prune,\n",
|
|
" pruning_method=prune.L1Unstructured,\n",
|
|
" amount=0.2,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Sparsity in conv1.weight: 5.56%\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\n",
|
|
" \"Sparsity in conv1.weight: {:.2f}%\".format(\n",
|
|
" 100. * float(torch.sum(model.conv1.weight == 0))\n",
|
|
" / float(model.conv1.weight.nelement())\n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Sparsity in conv2.weight: 7.87%\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\n",
|
|
" \"Sparsity in conv2.weight: {:.2f}%\".format(\n",
|
|
" 100. * float(torch.sum(model.conv2.weight == 0))\n",
|
|
" / float(model.conv2.weight.nelement())\n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Sparsity in fc1.weight: 22.00%\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\n",
|
|
" \"Sparsity in fc1.weight: {:.2f}%\".format(\n",
|
|
" 100. * float(torch.sum(model.fc1.weight == 0))\n",
|
|
" / float(model.fc1.weight.nelement())\n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Sparsity in fc2.weight: 12.41%\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\n",
|
|
" \"Sparsity in fc2.weight: {:.2f}%\".format(\n",
|
|
" 100. * float(torch.sum(model.fc2.weight == 0))\n",
|
|
" / float(model.fc2.weight.nelement())\n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Sparsity in fc3.weight: 10.48%\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\n",
|
|
" \"Sparsity in fc3.weight: {:.2f}%\".format(\n",
|
|
" 100. * float(torch.sum(model.fc3.weight == 0))\n",
|
|
" / float(model.fc3.weight.nelement())\n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Global sparsity: 20.00%\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(\n",
|
|
" \"Global sparsity: {:.2f}%\".format(\n",
|
|
" 100. * float(\n",
|
|
" torch.sum(model.conv1.weight == 0)\n",
|
|
" + torch.sum(model.conv2.weight == 0)\n",
|
|
" + torch.sum(model.fc1.weight == 0)\n",
|
|
" + torch.sum(model.fc2.weight == 0)\n",
|
|
" + torch.sum(model.fc3.weight == 0)\n",
|
|
" )\n",
|
|
" / float(\n",
|
|
" model.conv1.weight.nelement()\n",
|
|
" + model.conv2.weight.nelement()\n",
|
|
" + model.fc1.weight.nelement()\n",
|
|
" + model.fc2.weight.nelement()\n",
|
|
" + model.fc3.weight.nelement()\n",
|
|
" )\n",
|
|
" )\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"54"
|
|
]
|
|
},
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.conv1.weight.nelement()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"tensor([[[[-0.2216, 0.1865, 0.2481],\n",
|
|
" [-0.1684, -0.2357, -0.2128],\n",
|
|
" [ 0.1659, -0.2490, -0.2235]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0586, 0.0346, -0.1531],\n",
|
|
" [-0.0187, -0.3142, -0.0000],\n",
|
|
" [-0.1740, -0.1174, -0.2448]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.1202, -0.0211, -0.0783],\n",
|
|
" [-0.3148, -0.2717, 0.0340],\n",
|
|
" [-0.0460, 0.3228, -0.1805]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.0363, 0.1857, -0.2712],\n",
|
|
" [ 0.0000, 0.2806, -0.0261],\n",
|
|
" [-0.0000, -0.2439, -0.1886]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[ 0.1291, -0.1453, 0.0179],\n",
|
|
" [-0.2168, -0.3066, 0.2693],\n",
|
|
" [-0.2566, -0.2499, -0.1595]]],\n",
|
|
"\n",
|
|
"\n",
|
|
" [[[-0.0998, -0.2484, -0.2465],\n",
|
|
" [-0.0702, -0.1950, 0.0241],\n",
|
|
" [ 0.1547, 0.2272, -0.3236]]]], grad_fn=<MulBackward0>)"
|
|
]
|
|
},
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.conv1.weight # "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|