Experiment 1.3.2
Repeat 1.3.1 (fixing a mistake of 1.3.1)This is a copy of experiment 1.3.1. It is being repeated as I realized that I made a mistake by not holding the parameters of the model fixed (the parameters before the newly created last layer).
Dataset
First, we need to organise our orange-brown dataset in a way that Pytorch can consume it. We will be trying to train it to distinguish orange and brown colors.
Most of the work for this has been done by the nncolor.data module. Below we just test it out to make sure it’s working.
The dataset is supposed to use the data from experiment 1.1.1 to produce circles against a background. But we make the circles smaller and place them in 1 of 4x4=16 grid positions. This is done to force the model to learn the answer invariant of where it appears in the image.
import cv2
import numpy as np
from enum import Enum
import colorsys
import moviepy.editor as mpe
import moviepy
from typing import *
import random
import pandas as pd
import json
import torch
from icecream import ic
import nncolor as nc
import nncolor.data
import IPython
def imshow(img):
img = img*255
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
_,ret = cv2.imencode('.jpg', img)
i = IPython.display.Image(data=ret)
IPython.display.display(i)
def demo_data():
FPS = 2
#frames = [circle_img(p, (255, 255, 255), (50, 20, 20)) for p in range(NUM_POSITIONS)]
#labels = ['WB-0']*len(frames)
frames, labels = nc.data.create_samples(30)
frames = [f*255 for f in frames]
x_clip = mpe.ImageSequenceClip(frames, fps=2)
y_clip = mpe.TextClip('WB-0', font='DejaVu-Sans')
class FrameText(mpe.VideoClip):
def __init__(self, text, fps):
def make_frame(f):
return mpe.TextClip(text[int(f)], font='DejaVu-Sans', color='white').get_frame(f)
self.duration = 1.0 * len(text) / fps
mpe.VideoClip.__init__(self, make_frame=make_frame, duration=self.duration)
y_clip = FrameText(labels, FPS)
label_clip = mpe.CompositeVideoClip([mpe.ImageClip(np.zeros(nc.data.DEFAULT_IMG_SHAPE), duration=5), y_clip])
comp_clip = mpe.clips_array([[y_clip],[x_clip]])
return comp_clip
clip = demo_data()
clip.ipython_display(rd_kwargs={'logger':None})
def test_dataset():
train, test, val = nc.data.load_datasets()
imshow(val[30]['image'])
test_dataset()
Training
Let’s fine tune a Resnet model.
# Copied from: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
import torchvision as tv
import torchvision.datasets
import torchvision.models
import torchvision.transforms
import torch.nn
import torch.optim
import time
import copy
import os
# Data augmentation and normalization for training
# Just normalization for validation
data_transform = tv.transforms.Compose([
tv.transforms.ToTensor(),
tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# Haven't coded up a test loop yet, so ignore test set for now.
colors = nncolor.data.filter_colors(nncolor.data.exp_1_1_data,
include_colors={'orange', 'brown', 'neither'})
datasets = nncolor.data.train_test_val_split(colors, split_ratio=(11, 0, 7))
train_ds.transform = data_transform
val_ds.transform = data_transform
ds = {'train': train_ds, 'val': val_ds}
dataloaders = {x: torch.utils.data.DataLoader(ds[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(ds[x]) for x in ['train', 'val']}
device = (torch.device("cuda:0") if torch.cuda.is_available() else "cpu")
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
#for inputs, labels in dataloaders[phase]:
for batch in dataloaders[phase]:
inputs = batch['image'].to(device)
labels = batch['label'].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
# Load & train
model_ft = tv.models.resnet18(pretrained=True)
## [begin] This is where experiment 1.3.2 differs:
for param in model_ft.parameters():
param.requires_grad = False
## [end]
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = torch.nn.Linear(num_ftrs, 4)
model_ft = model_ft.to(device)
criterion = torch.nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
num_epochs = 20
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=num_epochs)
Epoch 0/19
----------
train Loss: 0.7470 Acc: 0.6932
val Loss: 0.4076 Acc: 0.8490
Epoch 1/19
----------
train Loss: 0.6639 Acc: 0.7500
val Loss: 0.3270 Acc: 0.8906
Epoch 2/19
----------
train Loss: 0.6726 Acc: 0.7495
val Loss: 0.3415 Acc: 0.8958
Epoch 3/19
----------
train Loss: 0.6355 Acc: 0.7562
val Loss: 0.3195 Acc: 0.8750
Epoch 4/19
----------
train Loss: 0.5950 Acc: 0.7737
val Loss: 0.3038 Acc: 0.8646
Epoch 5/19
----------
train Loss: 0.5860 Acc: 0.7779
val Loss: 0.4004 Acc: 0.8229
Epoch 6/19
----------
train Loss: 0.5823 Acc: 0.7841
val Loss: 0.3246 Acc: 0.8542
Epoch 7/19
----------
train Loss: 0.5063 Acc: 0.8040
val Loss: 0.1951 Acc: 0.9167
Epoch 8/19
----------
train Loss: 0.4991 Acc: 0.8016
val Loss: 0.2256 Acc: 0.8906
Epoch 9/19
----------
train Loss: 0.4803 Acc: 0.8120
val Loss: 0.2743 Acc: 0.8490
Epoch 10/19
----------
train Loss: 0.4808 Acc: 0.8125
val Loss: 0.1966 Acc: 0.9271
Epoch 11/19
----------
train Loss: 0.4875 Acc: 0.8097
val Loss: 0.1628 Acc: 0.9531
Epoch 12/19
----------
train Loss: 0.5276 Acc: 0.8049
val Loss: 0.3030 Acc: 0.8906
Epoch 13/19
----------
train Loss: 0.4685 Acc: 0.8111
val Loss: 0.1865 Acc: 0.9531
Epoch 14/19
----------
train Loss: 0.4775 Acc: 0.8187
val Loss: 0.2256 Acc: 0.9271
Epoch 15/19
----------
train Loss: 0.4686 Acc: 0.8224
val Loss: 0.1971 Acc: 0.9271
Epoch 16/19
----------
train Loss: 0.4807 Acc: 0.8063
val Loss: 0.2373 Acc: 0.9062
Epoch 17/19
----------
train Loss: 0.4531 Acc: 0.8253
val Loss: 0.2115 Acc: 0.9219
Epoch 18/19
----------
train Loss: 0.4425 Acc: 0.8267
val Loss: 0.1871 Acc: 0.9375
Epoch 19/19
----------
train Loss: 0.4728 Acc: 0.8092
val Loss: 0.1934 Acc: 0.9271
Training complete in 1m 12s
Best val Acc: 0.953125
Results
After 20 epochs we have ~90% validation accuracy. The accuracy is similar enough to 1.3.1 to come to the same conclusion.
Next steps (copied)
Try to get more details on how the model is working.
There are a few ways we can probe. Some ideas:
- randomly choose a set of activations to zero. How big can we make the set?
- estimate the order of the activation importance, and start from the top or bottom.
- look at the layer before final pooling. We are interested in seeing if the network has a degree of resolution to its color.
For the first method, we need a way to aggregate the result of multiple trials in order to draw conclusions about particular activations. For the second method, we need a way to estimate the activation importance, maybe like how its done in Optimal Brain Damage.
The 3rd method is the most similar to this experiment, so let’s do that first (experiment 1.4).
Validation vs. training accuracy
Another observation is that validation accuracy is greater than training accuracy. This would be reasonable if training uses some dropout or similar technique; however, I’m not sure if it does or not. Worth investigating this to find possible configuration mistakes.