Skip to main content

Testing Resnet18 with dropout

Created on January 13|Last edited on January 13
Actual training code:
def train_model(cfg=wandb.config):
from torchvision import models
model = models.resnet18()
model.fc = nn.Linear(512, 10)
for name, module in model.named_children():
if name == 'fc':
continue
print(name, module)
module = nn.Sequential(module, nn.Dropout(p=cfg.dropout))
model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=cfg.lr, momentum=1-cfg.beta, nesterov=True, weight_decay=cfg.wd)
epochs = 50
for t in trange(epochs):
train(t, train_dataloader, model, loss_fn, optimizer)
loss, acc = test(t, val_dataloader, model, loss_fn)
torch.save(model.state_dict(), 'model.p')
name = f'resnet_loss_{loss}_acc_{acc}'
artifact = wandb.Artifact(name, type='model')
artifact.add_file('model.p')
wandb.log_artifact(artifact)
solve_test(model, name)
return model


# def try_config(config=None):
#
# with wandb.init(config=config):
# train_model()


# wandb.agent('sx349jsp', function=try_config, project='cnn', count=10**9)
wandb.init(project='cnn')
wandb.config.lr = 0.01
wandb.config.wd = 0.001
wandb.config.beta = 0.015
wandb.config.dropout = 0.1
train_model()


Sweep for learning rate, momentum, and weight decay

The best parameters were around beta = 0.015, lr = 1e-2, wd = 1e-3, accuracy reached 76.78%, validation loss 0.7382


Run set
19


Visualizing parameter evolutions in 2D

def get_params(model):
lst = []
treshold = 100
for e in model.parameters():
flat = e.view(-1)
lst.append(flat[:min(len(flat), treshold)].cpu().detach())
return torch.cat(lst, dim=0)
def eval_unity_root(poly, arg):
num = np.exp(1j * arg)
return np.polyval(poly, num)
def complex_hash(model, n):
params = get_params(model)
return np.abs(eval_unity_root(params, np.linspace(0, 2 * np.pi, num = n, endpoint = False)))

def train(epoch, dataloader, model, loss_fn, optimizer):
# ...
for batch, (X, y) in enumerate(dataloader):
# ...
if batch % interval == 0:
hashes = complex_hash(model, 2)
wandb.log({
# ...
'hash_x': hashes[0],
'hash_y': hashes[1]
})

Choosing dropout rate

It works quite similar with different rates:

Run set
20


Analyzing visualizations


Run set
1


Augmenting with AutoAugmentPolicy.SVHN


data = SVHN(
root=dataset_root,
is_train=True,
transform=Compose([AutoAugment(autoaugment.AutoAugmentPolicy.SVHN), ToTensor()])
)
Finally, it doesn't overfit.

Run set
1