pytorch-stuff/Pruning_LeNet.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# This is the pruning of a single layer of the LeNet model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn as nn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torchvision"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.functional as F"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have to check if the computer is cuda enabled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The defintion of the model and its feedforward function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "class LeNet(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(LeNet, self).__init__()\n",
    "        # 1 input image channel, 6 output channels, 3x3 square conv kernel\n",
    "        self.conv1 = nn.Conv2d(1, 6, 3)\n",
    "        self.conv2 = nn.Conv2d(6, 16, 3)\n",
    "        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5x5 image dimension\n",
    "        self.fc2 = nn.Linear(120, 84)\n",
    "        self.fc3 = nn.Linear(84, 10)\n",
    "        \n",
    "    def forward(self, x):\n",
    "        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n",
    "        x = F.max_pool2d(F.relu(self.conv2(x)), 2)\n",
    "        x = x.view(-1, int(x.nelement() / x.shape[0]))\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = F.relu(self.fc2(x))\n",
    "        x = self.fc3(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A new object of LeNet class is initialized"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LeNet().to(device=device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We save the first convolutional layer of the LeNet object in variable module, its paramenters are showed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('weight', Parameter containing:\n",
      "tensor([[[[ 0.1114,  0.0445, -0.2559],\n",
      "          [-0.2359,  0.2247,  0.2104],\n",
      "          [-0.2143, -0.2063,  0.1992]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.2150,  0.2836],\n",
      "          [ 0.3173, -0.2818,  0.2136],\n",
      "          [-0.1059,  0.1499, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0675, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.1344],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3213,  0.1024, -0.2117],\n",
      "          [-0.0538,  0.2364,  0.2069],\n",
      "          [-0.0517, -0.1570, -0.1852]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.1578, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3220,  0.2642, -0.1456],\n",
      "          [ 0.1882, -0.0138,  0.1939],\n",
      "          [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True)), ('bias', Parameter containing:\n",
      "tensor([ 0.1925, -0.1331, -0.1258,  0.2297,  0.0064,  0.3205], device='cuda:0',\n",
      "       requires_grad=True))]\n"
     ]
    }
   ],
   "source": [
    "module = model.conv1\n",
    "#torch.save(module,\"layer_wo_pruning.pt\")\n",
    "print(list(module.named_parameters()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We see that no new buffers have been added to the layer tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[]\n"
     ]
    }
   ],
   "source": [
    "print(list(module.named_buffers()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The default pruning library is imported"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn.utils.prune as prune"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The random_unstructured function is passed to the conv1 layer weights, this adds a buffer named weight_mask which is a binary mask\n",
    "applied to the parameter weight by the pruning method. Replaces the parameter weight by its pruned version and stores the\n",
    "original in weight_orig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prune.random_unstructured(module, name=\"weight\", amount=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('bias', Parameter containing:\n",
      "tensor([ 0.1925, -0.1331, -0.1258,  0.2297,  0.0064,  0.3205], device='cuda:0',\n",
      "       requires_grad=True)), ('weight_orig', Parameter containing:\n",
      "tensor([[[[ 0.1114,  0.0445, -0.2559],\n",
      "          [-0.2359,  0.2247,  0.2104],\n",
      "          [-0.2143, -0.2063,  0.1992]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.2150,  0.2836],\n",
      "          [ 0.3173, -0.2818,  0.2136],\n",
      "          [-0.1059,  0.1499, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0675, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.1344],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3213,  0.1024, -0.2117],\n",
      "          [-0.0538,  0.2364,  0.2069],\n",
      "          [-0.0517, -0.1570, -0.1852]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.1578, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3220,  0.2642, -0.1456],\n",
      "          [ 0.1882, -0.0138,  0.1939],\n",
      "          [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True))]\n"
     ]
    }
   ],
   "source": [
    "print(list(module.named_parameters()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('weight_mask', tensor([[[[0., 1., 1.],\n",
      "          [0., 0., 0.],\n",
      "          [0., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 0., 1.],\n",
      "          [1., 0., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 1., 0.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [0., 0., 0.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 1., 1.],\n",
      "          [1., 0., 1.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 1., 1.],\n",
      "          [1., 1., 1.]]]], device='cuda:0'))]\n"
     ]
    }
   ],
   "source": [
    "print(list(module.named_buffers()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[[[ 0.0000,  0.0445, -0.2559],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0000, -0.2063,  0.1992]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.0000,  0.2836],\n",
      "          [ 0.3173, -0.0000,  0.2136],\n",
      "          [-0.1059,  0.0000, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0000, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.0000],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3213,  0.0000, -0.2117],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0517, -0.1570, -0.1852]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.0000, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3220,  0.0000, -0.1456],\n",
      "          [ 0.1882, -0.0138,  0.1939],\n",
      "          [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0',\n",
      "       grad_fn=<MulBackward0>)\n"
     ]
    }
   ],
   "source": [
    "print(module.weight)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OrderedDict([(0, <torch.nn.utils.prune.RandomUnstructured object at 0x7f156c1b9a10>)])\n"
     ]
    }
   ],
   "source": [
    "print(module._forward_pre_hooks)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The l1_unstructured function is passed to the conv1 layer bias, this adds a buffer named bias_mask which is a binary mask\n",
    "applied to the parameter bias by the pruning method. Replaces the parameter bias by its pruned version and stores the\n",
    "original in bias_orig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prune.l1_unstructured(module, name=\"bias\", amount=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('weight_orig', Parameter containing:\n",
      "tensor([[[[ 0.1114,  0.0445, -0.2559],\n",
      "          [-0.2359,  0.2247,  0.2104],\n",
      "          [-0.2143, -0.2063,  0.1992]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.2150,  0.2836],\n",
      "          [ 0.3173, -0.2818,  0.2136],\n",
      "          [-0.1059,  0.1499, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0675, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.1344],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3213,  0.1024, -0.2117],\n",
      "          [-0.0538,  0.2364,  0.2069],\n",
      "          [-0.0517, -0.1570, -0.1852]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.1578, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3220,  0.2642, -0.1456],\n",
      "          [ 0.1882, -0.0138,  0.1939],\n",
      "          [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True)), ('bias_orig', Parameter containing:\n",
      "tensor([ 0.1925, -0.1331, -0.1258,  0.2297,  0.0064,  0.3205], device='cuda:0',\n",
      "       requires_grad=True))]\n"
     ]
    }
   ],
   "source": [
    "print(list(module.named_parameters()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('weight_mask', tensor([[[[0., 1., 1.],\n",
      "          [0., 0., 0.],\n",
      "          [0., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 0., 1.],\n",
      "          [1., 0., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 1., 0.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [0., 0., 0.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 1., 1.],\n",
      "          [1., 0., 1.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 1., 1.],\n",
      "          [1., 1., 1.]]]], device='cuda:0')), ('bias_mask', tensor([1., 0., 0., 1., 0., 1.], device='cuda:0'))]\n"
     ]
    }
   ],
   "source": [
    "print(list(module.named_buffers()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([0.1925, -0.0000, -0.0000, 0.2297, 0.0000, 0.3205], device='cuda:0',\n",
      "       grad_fn=<MulBackward0>)\n"
     ]
    }
   ],
   "source": [
    "print(module.bias)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OrderedDict([(0, <torch.nn.utils.prune.RandomUnstructured object at 0x7f156c1b9a10>), (1, <torch.nn.utils.prune.L1Unstructured object at 0x7f14e2c47810>)])\n"
     ]
    }
   ],
   "source": [
    "print(module._forward_pre_hooks)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The ln_structured function is passed to the conv1 layer weight, this adds a buffer named weight_mask which is a binary mask\n",
    "applied to the parameter weight by the pruning method. Replaces the parameter weight by its pruned version and stores the\n",
    "original in weight_orig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[[[ 0.0000,  0.0000, -0.0000],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0000, -0.0000,  0.0000]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.0000,  0.2836],\n",
      "          [ 0.3173, -0.0000,  0.2136],\n",
      "          [-0.1059,  0.0000, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0000, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.0000],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0000,  0.0000, -0.0000],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0000, -0.0000, -0.0000]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.0000, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0000,  0.0000, -0.0000],\n",
      "          [ 0.0000, -0.0000,  0.0000],\n",
      "          [ 0.0000, -0.0000, -0.0000]]]], device='cuda:0',\n",
      "       grad_fn=<MulBackward0>)\n"
     ]
    }
   ],
   "source": [
    "prune.ln_structured(module, name=\"weight\", amount=0.5, n=2, dim=0)\n",
    "\n",
    "# As we can verify, this will zero out all the connections corresponding to\n",
    "# 50% (3 out of 6) of the channels, while preserving the action of the\n",
    "# previous mask.\n",
    "print(module.weight)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<torch.nn.utils.prune.RandomUnstructured object at 0x7f156c1b9a10>, <torch.nn.utils.prune.LnStructured object at 0x7f14e2c52350>]\n"
     ]
    }
   ],
   "source": [
    "for hook in module._forward_pre_hooks.values():\n",
    "    if hook._tensor_name == \"weight\":  # select out the correct hook\n",
    "        break\n",
    "\n",
    "print(list(hook))  # pruning history in the container\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "odict_keys(['conv1.weight_orig', 'conv1.bias_orig', 'conv1.weight_mask', 'conv1.bias_mask', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias'])\n"
     ]
    }
   ],
   "source": [
    "print(model.state_dict().keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('weight_orig', Parameter containing:\n",
      "tensor([[[[ 0.1114,  0.0445, -0.2559],\n",
      "          [-0.2359,  0.2247,  0.2104],\n",
      "          [-0.2143, -0.2063,  0.1992]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.2150,  0.2836],\n",
      "          [ 0.3173, -0.2818,  0.2136],\n",
      "          [-0.1059,  0.1499, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0675, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.1344],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3213,  0.1024, -0.2117],\n",
      "          [-0.0538,  0.2364,  0.2069],\n",
      "          [-0.0517, -0.1570, -0.1852]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.1578, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.3220,  0.2642, -0.1456],\n",
      "          [ 0.1882, -0.0138,  0.1939],\n",
      "          [ 0.0574, -0.0914, -0.2180]]]], device='cuda:0', requires_grad=True)), ('bias_orig', Parameter containing:\n",
      "tensor([ 0.1925, -0.1331, -0.1258,  0.2297,  0.0064,  0.3205], device='cuda:0',\n",
      "       requires_grad=True))]\n"
     ]
    }
   ],
   "source": [
    "print(list(module.named_parameters()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('weight_mask', tensor([[[[0., 0., 0.],\n",
      "          [0., 0., 0.],\n",
      "          [0., 0., 0.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 0., 1.],\n",
      "          [1., 0., 1.]]],\n",
      "\n",
      "\n",
      "        [[[1., 0., 1.],\n",
      "          [1., 1., 0.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[0., 0., 0.],\n",
      "          [0., 0., 0.],\n",
      "          [0., 0., 0.]]],\n",
      "\n",
      "\n",
      "        [[[1., 1., 1.],\n",
      "          [1., 0., 1.],\n",
      "          [1., 1., 1.]]],\n",
      "\n",
      "\n",
      "        [[[0., 0., 0.],\n",
      "          [0., 0., 0.],\n",
      "          [0., 0., 0.]]]], device='cuda:0')), ('bias_mask', tensor([1., 0., 0., 1., 0., 1.], device='cuda:0'))]\n"
     ]
    }
   ],
   "source": [
    "print(list(module.named_buffers()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[[[ 0.0000,  0.0000, -0.0000],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0000, -0.0000,  0.0000]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.0000,  0.2836],\n",
      "          [ 0.3173, -0.0000,  0.2136],\n",
      "          [-0.1059,  0.0000, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0000, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.0000],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0000,  0.0000, -0.0000],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0000, -0.0000, -0.0000]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.0000, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0000,  0.0000, -0.0000],\n",
      "          [ 0.0000, -0.0000,  0.0000],\n",
      "          [ 0.0000, -0.0000, -0.0000]]]], device='cuda:0',\n",
      "       grad_fn=<MulBackward0>)\n"
     ]
    }
   ],
   "source": [
    "print(module.weight)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Removes the pruning reparameterization from a module and the pruning method from the forward hook. The pruned\n",
    "parameter named ``name`` remains permanently pruned, and the parameter named ``name+'_orig'`` is removed from \n",
    "the parameter list. Similarly, the buffer named ``name+'_mask'`` is removed from the buffers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('bias_orig', Parameter containing:\n",
      "tensor([ 0.1925, -0.1331, -0.1258,  0.2297,  0.0064,  0.3205], device='cuda:0',\n",
      "       requires_grad=True)), ('weight', Parameter containing:\n",
      "tensor([[[[ 0.0000,  0.0000, -0.0000],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0000, -0.0000,  0.0000]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0273,  0.0000,  0.2836],\n",
      "          [ 0.3173, -0.0000,  0.2136],\n",
      "          [-0.1059,  0.0000, -0.1389]]],\n",
      "\n",
      "\n",
      "        [[[-0.2346,  0.0000, -0.1149],\n",
      "          [ 0.2330,  0.2610,  0.0000],\n",
      "          [-0.2453,  0.1257, -0.1581]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0000,  0.0000, -0.0000],\n",
      "          [-0.0000,  0.0000,  0.0000],\n",
      "          [-0.0000, -0.0000, -0.0000]]],\n",
      "\n",
      "\n",
      "        [[[-0.1652,  0.0627, -0.2905],\n",
      "          [-0.1974,  0.0000, -0.2420],\n",
      "          [-0.1531,  0.1894, -0.2319]]],\n",
      "\n",
      "\n",
      "        [[[ 0.0000,  0.0000, -0.0000],\n",
      "          [ 0.0000, -0.0000,  0.0000],\n",
      "          [ 0.0000, -0.0000, -0.0000]]]], device='cuda:0', requires_grad=True))]\n"
     ]
    }
   ],
   "source": [
    "prune.remove(module, 'weight')\n",
    "#torch.save(module,\"layer_w_pruning.pt\")\n",
    "print(list(module.named_parameters()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pruning multiple parameters in the model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By specifying the desired pruning technique and parameters, we can easily prune multiple tensors in a network, perhaps according to their type, as we will see in this example."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "#from pytorch_modelsize import SizeEstimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# (Batch, Channels, Height, Width) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['conv1.weight_mask', 'conv2.weight_mask', 'fc1.weight_mask', 'fc2.weight_mask', 'fc3.weight_mask'])\n"
     ]
    }
   ],
   "source": [
    "new_model = LeNet()\n",
    "#se = SizeEstimator(new_model)\n",
    "#print(se.estimate_size())\n",
    "for name, module in new_model.named_modules():\n",
    "    # prune 20% of connections in all 2D-conv layers\n",
    "    if isinstance(module, torch.nn.Conv2d):\n",
    "        prune.l1_unstructured(module, name='weight', amount=0.2)\n",
    "    # prune 40% of connections in all linear layers\n",
    "    elif isinstance(module, torch.nn.Linear):\n",
    "        prune.l1_unstructured(module, name='weight', amount=0.4)\n",
    "\n",
    "#torch.save(new_model,\"model_2.pt\")\n",
    "print(dict(new_model.named_buffers()).keys())  # to verify that all masks exist"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Global Pruning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = LeNet()\n",
    "\n",
    "parameters_to_prune = (\n",
    "    (model.conv1, 'weight'),\n",
    "    (model.conv2, 'weight'),\n",
    "    (model.fc1, 'weight'),\n",
    "    (model.fc2, 'weight'),\n",
    "    (model.fc3, 'weight'),\n",
    ")\n",
    "\n",
    "prune.global_unstructured(\n",
    "    parameters_to_prune,\n",
    "    pruning_method=prune.L1Unstructured,\n",
    "    amount=0.2,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sparsity in conv1.weight: 5.56%\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    \"Sparsity in conv1.weight: {:.2f}%\".format(\n",
    "        100. * float(torch.sum(model.conv1.weight == 0))\n",
    "        / float(model.conv1.weight.nelement())\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sparsity in conv2.weight: 7.87%\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    \"Sparsity in conv2.weight: {:.2f}%\".format(\n",
    "        100. * float(torch.sum(model.conv2.weight == 0))\n",
    "        / float(model.conv2.weight.nelement())\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sparsity in fc1.weight: 22.00%\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    \"Sparsity in fc1.weight: {:.2f}%\".format(\n",
    "        100. * float(torch.sum(model.fc1.weight == 0))\n",
    "        / float(model.fc1.weight.nelement())\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sparsity in fc2.weight: 12.41%\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    \"Sparsity in fc2.weight: {:.2f}%\".format(\n",
    "        100. * float(torch.sum(model.fc2.weight == 0))\n",
    "        / float(model.fc2.weight.nelement())\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sparsity in fc3.weight: 10.48%\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    \"Sparsity in fc3.weight: {:.2f}%\".format(\n",
    "        100. * float(torch.sum(model.fc3.weight == 0))\n",
    "        / float(model.fc3.weight.nelement())\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Global sparsity: 20.00%\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    \"Global sparsity: {:.2f}%\".format(\n",
    "        100. * float(\n",
    "            torch.sum(model.conv1.weight == 0)\n",
    "            + torch.sum(model.conv2.weight == 0)\n",
    "            + torch.sum(model.fc1.weight == 0)\n",
    "            + torch.sum(model.fc2.weight == 0)\n",
    "            + torch.sum(model.fc3.weight == 0)\n",
    "        )\n",
    "        / float(\n",
    "            model.conv1.weight.nelement()\n",
    "            + model.conv2.weight.nelement()\n",
    "            + model.fc1.weight.nelement()\n",
    "            + model.fc2.weight.nelement()\n",
    "            + model.fc3.weight.nelement()\n",
    "        )\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "54"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.conv1.weight.nelement()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[[[-0.2216,  0.1865,  0.2481],\n",
       "          [-0.1684, -0.2357, -0.2128],\n",
       "          [ 0.1659, -0.2490, -0.2235]]],\n",
       "\n",
       "\n",
       "        [[[ 0.0586,  0.0346, -0.1531],\n",
       "          [-0.0187, -0.3142, -0.0000],\n",
       "          [-0.1740, -0.1174, -0.2448]]],\n",
       "\n",
       "\n",
       "        [[[-0.1202, -0.0211, -0.0783],\n",
       "          [-0.3148, -0.2717,  0.0340],\n",
       "          [-0.0460,  0.3228, -0.1805]]],\n",
       "\n",
       "\n",
       "        [[[ 0.0363,  0.1857, -0.2712],\n",
       "          [ 0.0000,  0.2806, -0.0261],\n",
       "          [-0.0000, -0.2439, -0.1886]]],\n",
       "\n",
       "\n",
       "        [[[ 0.1291, -0.1453,  0.0179],\n",
       "          [-0.2168, -0.3066,  0.2693],\n",
       "          [-0.2566, -0.2499, -0.1595]]],\n",
       "\n",
       "\n",
       "        [[[-0.0998, -0.2484, -0.2465],\n",
       "          [-0.0702, -0.1950,  0.0241],\n",
       "          [ 0.1547,  0.2272, -0.3236]]]], grad_fn=<MulBackward0>)"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.conv1.weight # "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}