在上一篇文章中,我们已经介绍了赛题背景,目标任务,数据集格式以及评估指标,现在,来看看如何使用所给数据训练一个模型,来识别每张图片所对应的individual_id

本文的目标是对整个训练过程进行梳理,至于其中出现的一些概念,比如GeM Pooling,ArcFace等,将在后续出一篇文章进行介绍。

导入所需库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import gc
import cv2
import math
import copy
import time
import random

# For data manipulati
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

训练过程可视化工具

使用WandB可以在线查看训练过程中所记录的实时指标变化情况。

1
2
3
4
5
6
7
8
9
10
11
import wandb

try:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("wandb_api")
wandb.login(key=api_key)
anony = None
except:
anony = "must"
print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')

你可以去https://wandb.ai/authorize注册一个账号,然后用获得的api_key字符串替换下面的语句,然后按照提示打进入相应网址即可:

1
api_key = user_secrets.get_secret("wandb_api")

配置文件

总的类别数num_classes可以通过统计indivadual_id列中不同元素的个数得到:

1
len(df['individual_id'].unique())
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
CONFIG = {"seed": 2022,
"epochs": 4,
"img_size": 448,
"model_name": "tf_efficientnet_b0_ns",#可自行替换,timm库中包含很多优秀的backbone
"num_classes": 15587,#总的不同individual_id数
"embedding_size": 512,#特征编码长度
"train_batch_size": 32,
"valid_batch_size": 64,
"learning_rate": 1e-4,
"scheduler": 'CosineAnnealingLR',
"min_lr": 1e-6,
"T_max": 500,
"weight_decay": 1e-6,
"n_fold": 5,
"n_accumulate": 1,
"device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
# ArcFace Hyperparameters
"s": 30.0,
"m": 0.50,
"ls_eps": 0.0,#标签平滑系数
"easy_margin": False
}
ROOT_DIR = '../input/happy-whale-and-dolphin'
TRAIN_DIR = '../input/happy-whale-and-dolphin/train_images'#训练图片
TEST_DIR = '../input/happy-whale-and-dolphin/test_images'#测试图片

读取数据

1
2
3
4
5
6
def get_train_file_path(id):
return f"{TRAIN_DIR}/{id}"

df = pd.read_csv(f"{ROOT_DIR}/train.csv")
df['file_path'] = df['image'].apply(get_train_file_path)
df.head()

Alt text

标签编码

individual_id通过label encoder转成数字,这样才能被计算机识别,并继续接下来的操作:

1
2
3
4
5
encoder = LabelEncoder()
df['individual_id'] = encoder.fit_transform(df['individual_id'])
#存储
with open("le.pkl", "wb") as fp:
joblib.dump(encoder, fp)

数据划分

采用交叉验证的方式划分数据集,并保证训练集和验证集中各个类别比例基本一致(使用StratifiedKFold):

1
2
3
4
skf = StratifiedKFold(n_splits=CONFIG['n_fold'])

for fold, ( _, val_) in enumerate(skf.split(X=df, y=df.individual_id)):
df.loc[val_ , "kfold"] = fold

Alt text

准备DataSet类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class HappyWhaleDataset(Dataset):
def __init__(self, df, transforms=None):
self.df = df
self.file_names = df['file_path'].values
self.labels = df['individual_id'].values
self.transforms = transforms

def __len__(self):
return len(self.df)

def __getitem__(self, index):
img_path = self.file_names[index]
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
label = self.labels[index]

if self.transforms:
img = self.transforms(image=img)["image"]

return {
'image': img,
'label': torch.tensor(label, dtype=torch.long)
}

数据增强

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
data_transforms = {
"train": A.Compose([
A.Resize(CONFIG['img_size'], CONFIG['img_size']),
A.ShiftScaleRotate(shift_limit=0.1,
scale_limit=0.15,
rotate_limit=60,
p=0.5),
A.HueSaturationValue(
hue_shift_limit=0.2,
sat_shift_limit=0.2,
val_shift_limit=0.2,
p=0.5
),
A.RandomBrightnessContrast(
brightness_limit=(-0.1,0.1),
contrast_limit=(-0.1, 0.1),
p=0.5
),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
p=1.0
),
ToTensorV2()], p=1.),

"valid": A.Compose([
A.Resize(CONFIG['img_size'], CONFIG['img_size']),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
max_pixel_value=255.0,
p=1.0
),
ToTensorV2()], p=1.)
}

GeM Pooling

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class GeM(nn.Module):
def __init__(self, p=3, eps=1e-6):
super(GeM, self).__init__()
self.p = nn.Parameter(torch.ones(1)*p)
self.eps = eps

def forward(self, x):
return self.gem(x, p=self.p, eps=self.eps)

def gem(self, x, p=3, eps=1e-6):
return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

def __repr__(self):
return self.__class__.__name__ + \
'(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
', ' + 'eps=' + str(self.eps) + ')'

ArcFace

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class ArcMarginProduct(nn.Module):
r"""Implement of large margin arc distance: :
Args:
in_features: size of each input sample
out_features: size of each output sample
s: norm of input feature
m: margin
cos(theta + m)
"""
def __init__(self, in_features, out_features, s=30.0,
m=0.50, easy_margin=False, ls_eps=0.0):
super(ArcMarginProduct, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.s = s
self.m = m
self.ls_eps = ls_eps # label smoothing
self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)

self.easy_margin = easy_margin
self.cos_m = math.cos(m)
self.sin_m = math.sin(m)
self.th = math.cos(math.pi - m)
self.mm = math.sin(math.pi - m) * m

def forward(self, input, label):
# --------------------------- cos(theta) & phi(theta) ---------------------
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where(cosine > self.th, phi, cosine - self.mm)
# --------------------------- convert label to one-hot ---------------------
# one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
one_hot = torch.zeros(cosine.size(), device=CONFIG['device'])
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
if self.ls_eps > 0:
one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
# -------------torch.where(out_i = {x_i if condition_i else y_i) ------------
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output *= self.s

return output

构建模型

ArcFace可以看做分类网络最后的全连接层,在训练过程中,它的输入是上一层的输出向量(embedding)和当前图片的label

但是在实际测试时,label是需要预测的,因此测试时不需要经过ArcFace层,只需要拿到它之前的embedding就可以了,所以这里额外写了一个extract方法,里面的网络层和forward方法中调用的网络层是相同的,只是去掉了最后的ArcFace层。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class HappyWhaleModel(nn.Module):
def __init__(self, model_name, embedding_size, pretrained=True):
super(HappyWhaleModel, self).__init__()
self.model = timm.create_model(model_name, pretrained=pretrained)
in_features = self.model.classifier.in_features
self.model.classifier = nn.Identity()
self.model.global_pool = nn.Identity()
self.pooling = GeM()
self.embedding = nn.Linear(in_features, embedding_size)
self.fc = ArcMarginProduct(embedding_size,
CONFIG["num_classes"],
s=CONFIG["s"],
m=CONFIG["m"],
easy_margin=CONFIG["ls_eps"],
ls_eps=CONFIG["ls_eps"])

def forward(self, images, labels):
features = self.model(images)
pooled_features = self.pooling(features).flatten(1)
embedding = self.embedding(pooled_features)
output = self.fc(embedding, labels)
return output

def extract(self, images):
features = self.model(images)
print('features:',features)
pooled_features = self.pooling(features).flatten(1)
print('pooled_features:',pooled_features)
embedding = self.embedding(pooled_features)
return embedding


model = HappyWhaleModel(CONFIG['model_name'], CONFIG['embedding_size'])
model.to(CONFIG['device']);

损失函数

使用交叉熵作为分类损失:

1
2
def criterion(outputs, labels):
return nn.CrossEntropyLoss()(outputs, labels)

训练&验证函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# train
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
model.train()

dataset_size = 0
running_loss = 0.0

bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
images = data['image'].to(device, dtype=torch.float)
labels = data['label'].to(device, dtype=torch.long)

batch_size = images.size(0)

outputs = model(images, labels)
loss = criterion(outputs, labels)
loss = loss / CONFIG['n_accumulate']

loss.backward()

if (step + 1) % CONFIG['n_accumulate'] == 0:
optimizer.step()

# zero the parameter gradients
optimizer.zero_grad()

if scheduler is not None:
scheduler.step()

running_loss += (loss.item() * batch_size)
dataset_size += batch_size

epoch_loss = running_loss / dataset_size

bar.set_postfix(Epoch=epoch, Train_Loss=epoch_loss,
LR=optimizer.param_groups[0]['lr'])
gc.collect()

return epoch_loss

# val
@torch.inference_mode()
def valid_one_epoch(model, dataloader, device, epoch):
model.eval()

dataset_size = 0
running_loss = 0.0

bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
images = data['image'].to(device, dtype=torch.float)
labels = data['label'].to(device, dtype=torch.long)

batch_size = images.size(0)

outputs = model(images, labels)
loss = criterion(outputs, labels)

running_loss += (loss.item() * batch_size)
dataset_size += batch_size

epoch_loss = running_loss / dataset_size

bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss,
LR=optimizer.param_groups[0]['lr'])

gc.collect()

return epoch_loss


def run_training(model, optimizer, scheduler, device, num_epochs):
# To automatically log gradients
wandb.watch(model, log_freq=100)

if torch.cuda.is_available():
print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))

start = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_epoch_loss = np.inf
history = defaultdict(list)

for epoch in range(1, num_epochs + 1):
gc.collect()
train_epoch_loss = train_one_epoch(model, optimizer, scheduler,
dataloader=train_loader,
device=CONFIG['device'], epoch=epoch)

val_epoch_loss = valid_one_epoch(model, valid_loader, device=CONFIG['device'],
epoch=epoch)

history['Train Loss'].append(train_epoch_loss)
history['Valid Loss'].append(val_epoch_loss)

# Log the metrics
wandb.log({"Train Loss": train_epoch_loss})
wandb.log({"Valid Loss": val_epoch_loss})

# deep copy the model
if val_epoch_loss <= best_epoch_loss:
print(f"{b_}Validation Loss Improved ({best_epoch_loss} ---> {val_epoch_loss})")
best_epoch_loss = val_epoch_loss
run.summary["Best Loss"] = best_epoch_loss
best_model_wts = copy.deepcopy(model.state_dict())
PATH = "Loss{:.4f}_epoch{:.0f}.bin".format(best_epoch_loss, epoch)
torch.save(model.state_dict(), PATH)
# Save a model file from the current directory
print(f"Model Saved{sr_}")

print()

end = time.time()
time_elapsed = end - start
print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
print("Best Loss: {:.4f}".format(best_epoch_loss))

# load best model weights
model.load_state_dict(best_model_wts)

return model, history

设置学习率调度方式,优化器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def fetch_scheduler(optimizer):
if CONFIG['scheduler'] == 'CosineAnnealingLR':
scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CONFIG['T_max'],
eta_min=CONFIG['min_lr'])
elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CONFIG['T_0'],
eta_min=CONFIG['min_lr'])
elif CONFIG['scheduler'] == None:
return None

return scheduler

optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'],
weight_decay=CONFIG['weight_decay'])
scheduler = fetch_scheduler(optimizer)

定义某个fold的数据集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def prepare_loaders(df, fold):
df_train = df[df.kfold != fold].reset_index(drop=True)
df_valid = df[df.kfold == fold].reset_index(drop=True)

train_dataset = HappyWhaleDataset(df_train, transforms=data_transforms["train"])
valid_dataset = HappyWhaleDataset(df_valid, transforms=data_transforms["valid"])

train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'],
num_workers=2, shuffle=True, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'],
num_workers=2, shuffle=False, pin_memory=True)

return train_loader, valid_loader

train_loader, valid_loader = prepare_loaders(df, fold=0)

开始训练

定义训练函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def run_training(model, optimizer, scheduler, device, num_epochs):
# To automatically log gradients
wandb.watch(model, log_freq=100)

if torch.cuda.is_available():
print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))

start = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_epoch_loss = np.inf
history = defaultdict(list)

for epoch in range(1, num_epochs + 1):
gc.collect()
train_epoch_loss = train_one_epoch(model, optimizer, scheduler,
dataloader=train_loader,
device=CONFIG['device'], epoch=epoch)

val_epoch_loss = valid_one_epoch(model, valid_loader, device=CONFIG['device'],
epoch=epoch)

history['Train Loss'].append(train_epoch_loss)
history['Valid Loss'].append(val_epoch_loss)

# Log the metrics
wandb.log({"Train Loss": train_epoch_loss})
wandb.log({"Valid Loss": val_epoch_loss})

# deep copy the model
if val_epoch_loss <= best_epoch_loss:
print(f"{b_}Validation Loss Improved ({best_epoch_loss} ---> {val_epoch_loss})")
best_epoch_loss = val_epoch_loss
run.summary["Best Loss"] = best_epoch_loss
best_model_wts = copy.deepcopy(model.state_dict())
PATH = "Loss{:.4f}_epoch{:.0f}.bin".format(best_epoch_loss, epoch)
torch.save(model.state_dict(), PATH)
# Save a model file from the current directory
print(f"Model Saved{sr_}")

print()

end = time.time()
time_elapsed = end - start
print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
print("Best Loss: {:.4f}".format(best_epoch_loss))

# load best model weights
model.load_state_dict(best_model_wts)

return model, history

开始训练

1
2
3
4
5
6
7
8
9
run = wandb.init(project='HappyWhale', 
config=CONFIG,
job_type='Train',
tags=['arcface', 'gem-pooling', 'effnet-b0-ns', '448'],
anonymous='must')

model, history = run_training(model, optimizer, scheduler,
device=CONFIG['device'],
num_epochs=CONFIG['epochs'])

参考: