Testing Resnet18 with dropout
Created on January 13|Last edited on January 13
Comment
Actual training code:
def train_model(cfg=wandb.config):from torchvision import modelsmodel = models.resnet18()model.fc = nn.Linear(512, 10)for name, module in model.named_children():if name == 'fc':continueprint(name, module)module = nn.Sequential(module, nn.Dropout(p=cfg.dropout))model = model.to(device)loss_fn = nn.CrossEntropyLoss()optimizer = torch.optim.SGD(model.parameters(), lr=cfg.lr, momentum=1-cfg.beta, nesterov=True, weight_decay=cfg.wd)epochs = 50for t in trange(epochs):train(t, train_dataloader, model, loss_fn, optimizer)loss, acc = test(t, val_dataloader, model, loss_fn)torch.save(model.state_dict(), 'model.p')name = f'resnet_loss_{loss}_acc_{acc}'artifact = wandb.Artifact(name, type='model')artifact.add_file('model.p')wandb.log_artifact(artifact)solve_test(model, name)return model# def try_config(config=None):## with wandb.init(config=config):# train_model()# wandb.agent('sx349jsp', function=try_config, project='cnn', count=10**9)wandb.init(project='cnn')wandb.config.lr = 0.01wandb.config.wd = 0.001wandb.config.beta = 0.015wandb.config.dropout = 0.1train_model()
Sweep for learning rate, momentum, and weight decay
The best parameters were around beta = 0.015, lr = 1e-2, wd = 1e-3, accuracy reached 76.78%, validation loss 0.7382
Run set
19
Visualizing parameter evolutions in 2D
def get_params(model):lst = []treshold = 100for e in model.parameters():flat = e.view(-1)lst.append(flat[:min(len(flat), treshold)].cpu().detach())return torch.cat(lst, dim=0)def eval_unity_root(poly, arg):num = np.exp(1j * arg)return np.polyval(poly, num)def complex_hash(model, n):params = get_params(model)return np.abs(eval_unity_root(params, np.linspace(0, 2 * np.pi, num = n, endpoint = False)))def train(epoch, dataloader, model, loss_fn, optimizer):# ...for batch, (X, y) in enumerate(dataloader):# ...if batch % interval == 0:hashes = complex_hash(model, 2)wandb.log({# ...'hash_x': hashes[0],'hash_y': hashes[1]})
Choosing dropout rate
It works quite similar with different rates:
Run set
20
Analyzing visualizations
Run set
1
Augmenting with AutoAugmentPolicy.SVHN
data = SVHN(root=dataset_root,is_train=True,transform=Compose([AutoAugment(autoaugment.AutoAugmentPolicy.SVHN), ToTensor()]))
Finally, it doesn't overfit.
Run set
1
Add a comment