Testing Resnet18 with dropout

Created on January 13|Last edited on January 13
Comment
Actual training code:
def train_model(cfg=wandb.config):
    from torchvision import models
    model = models.resnet18()
    model.fc = nn.Linear(512, 10)
    for name, module in model.named_children():
        if name == 'fc':
            continue
        print(name, module)
        module = nn.Sequential(module, nn.Dropout(p=cfg.dropout))
    model = model.to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=cfg.lr, momentum=1-cfg.beta, nesterov=True, weight_decay=cfg.wd)
    epochs = 50
    for t in trange(epochs):
        train(t, train_dataloader, model, loss_fn, optimizer)
        loss, acc = test(t, val_dataloader, model, loss_fn)
        torch.save(model.state_dict(), 'model.p')
        name = f'resnet_loss_{loss}_acc_{acc}'
        artifact = wandb.Artifact(name, type='model')
        artifact.add_file('model.p')
        wandb.log_artifact(artifact)
        solve_test(model, name)
    return model
﻿
﻿
# def try_config(config=None):
#     
#     with wandb.init(config=config):
#         train_model()
﻿
﻿
# wandb.agent('sx349jsp', function=try_config, project='cnn', count=10**9)
wandb.init(project='cnn')
wandb.config.lr = 0.01
wandb.config.wd = 0.001
wandb.config.beta = 0.015
wandb.config.dropout = 0.1
train_model()
﻿
Sweep for learning rate, momentum, and weight decayThe best parameters were around beta = 0.015, lr = 1e-2, wd = 1e-3, accuracy reached 76.78%, validation loss 0.7382
﻿
﻿
Run set19
﻿
Visualizing parameter evolutions in 2Ddef get_params(model):
    lst = []
    treshold = 100
    for e in model.parameters():
        flat = e.view(-1)
        lst.append(flat[:min(len(flat), treshold)].cpu().detach())
    return torch.cat(lst, dim=0)
def eval_unity_root(poly, arg):
    num = np.exp(1j * arg)
    return np.polyval(poly, num)
def complex_hash(model, n):
    params = get_params(model)
    return np.abs(eval_unity_root(params, np.linspace(0, 2 * np.pi, num = n, endpoint = False)))
﻿
def train(epoch, dataloader, model, loss_fn, optimizer):
    # ...
    for batch, (X, y) in enumerate(dataloader):
        # ...
        if batch % interval == 0:
            hashes = complex_hash(model, 2)
            wandb.log({
                # ...
                'hash_x': hashes[0],
                'hash_y': hashes[1]
            })
Choosing dropout rateIt works quite similar with different rates:
﻿
Run set20
﻿
Analyzing visualizations﻿
Run set1
﻿
Augmenting with AutoAugmentPolicy.SVHN﻿
data = SVHN(
    root=dataset_root,
    is_train=True,
    transform=Compose([AutoAugment(autoaugment.AutoAugmentPolicy.SVHN), ToTensor()])
)
Finally, it doesn't overfit.
﻿
Run set1
﻿
﻿
Add a comment