defsame_seed(seed): '''Fixes random number generator seeds for reproducibility.''' torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
deftrain_valid_split(data_set, valid_ratio, seed): '''Split provided training data into training set and validation set''' valid_set_size = int(valid_ratio * len(data_set)) train_set_size = len(data_set) - valid_set_size train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed)) return np.array(train_set), np.array(valid_set)
defpredict(test_loader, model, device): model.eval() # Set your model to evaluation mode. preds = [] for x in tqdm(test_loader): x = x.to(device) with torch.no_grad(): pred = model(x) preds.append(pred.detach().cpu()) preds = torch.cat(preds, dim=0).numpy() return preds
criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this.
# Define your optimization algorithm. # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms. # TODO: L2 regularization (optimizer(weight decay...) or implement by your self). #optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'],weight_decay=0.01) optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9,weight_decay=0.1)
writer = SummaryWriter() # Writer of tensoboard.
ifnot os.path.isdir('./models'): os.mkdir('./models') # Create directory of saving models.
train_pbar = tqdm(range(n_epochs), position=0, leave=True) for epoch inrange(n_epochs): model.train() # Set your model to train mode. loss_record = []
for x, y in train_loader: optimizer.zero_grad() # Set gradient to zero. x, y = x.to(device), y.to(device) # Move your data to device. pred = model(x) loss = criterion(pred, y) loss.backward() # Compute gradient(backpropagation). optimizer.step() # Update parameters. step += 1 loss_record.append(loss.detach().item())
# Display current epoch number and loss on tqdm progress bar. # train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]') # train_pbar.set_postfix({'loss': loss.detach().item()})
model.eval() # Set your model to evaluation mode. loss_record = [] for x, y in valid_loader: x, y = x.to(device), y.to(device) with torch.no_grad(): pred = model(x) loss = criterion(pred, y)
if mean_valid_loss < best_loss: best_loss = mean_valid_loss torch.save(model.state_dict(), config['save_path']) # Save your best model # print('Saving model with loss {:.3f}...'.format(best_loss)) early_stop_count = 0 else: early_stop_count += 1
if early_stop_count >= config['early_stop']: print('\nModel is not improving, so we halt the training session.') return
Configurations
config contains hyper-parameters for training and the path to save your model.
1 2 3 4 5 6 7 8 9 10 11
device = 'cuda'if torch.cuda.is_available() else'cpu' config = { 'seed': 114514, # Your seed number, you can pick your lucky number. :) 'select_all': False, # Whether to use all features. 'valid_ratio': 0.2, # validation_size = train_size * valid_ratio 'n_epochs': 3000, # Number of epochs. 'batch_size': 256, 'learning_rate': 1e-5, 'early_stop': 400, # If model has not improved for this many consecutive epochs, stop training. 'save_path': './models/model.ckpt'# Your model will be saved here. }
Dataloader
Read data from files and set up training, validation, and testing sets. You do not need to modify this part.
model = My_Model(input_dim=x_train.shape[1]).to(device) # put your model and data on the same computation device. trainer(train_loader, valid_loader, model, config, device)
Plot learning curves with tensorboard (optional)
tensorboard is a tool that allows you to visualize your training progress.
If this block does not display your learning curve, please wait for few minutes, and re-run this block. It might take some time to load your logging information.
The predictions of your model on testing set will be stored at pred.csv.
1 2 3 4 5 6 7 8 9 10 11 12
defsave_pred(preds, file): ''' Save predictions to specified file ''' withopen(file, 'w') as fp: writer = csv.writer(fp) writer.writerow(['id', 'tested_positive']) for i, p inenumerate(preds): writer.writerow([i, p])