Hi @<1576381444509405184:profile|ManiacalLizard2> ! Can you please share a code snippet that I could run to investigate the issue?
Sure:
def main():
repo = "redacted"
commit = "redacted"
commit = "redacted"
bands = ["redacted"]
test_size = 0.2
batch_size = 64
num_workers = 12
img_size = (128, 128)
random_seed = 42
epoch = 20
learning_rate = 0.1
livbatch_list = get_livbatch_list(repo, commit)
lbs = download_batches(repo, commit, livbatch_list)
df, label_map = get_annotation_df(lbs, bands)
df_train, df_val = deterministic_train_val(df, test_size=test_size)
train_dataloader = dict(
batch_size=batch_size,
num_workers=num_workers,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=LivDataset(
anno_df=df_train,
bands=bands,
pipeline=[LivImageLoader(bands=bands, img_size=img_size),
Albumentations([
dict(
type='ShiftScaleRotate',
shift_limit=0.0625,
scale_limit=0.2,
rotate_limit=15,
interpolation=1,
border_mode=cv2.BORDER_CONSTANT,
p=0.5),
]),
dict(type='PackInputs')]
),
)
val_dataloader = dict(
batch_size=batch_size,
num_workers=num_workers,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=LivDataset(
df_val,
bands,
pipeline=[LivImageLoader(bands=bands, img_size=img_size),
dict(type='PackInputs')]
)
)
model = redacted
val_evaluator = [
dict(type='Accuracy', topk=(1, ), prefix='val/accuracy'),
dict(
type='SingleLabelMetric',
items=('precision', 'recall', 'f1-score'),
average='macro',
prefix='val/macro',
), # class-wise mean
dict(
type='SingleLabelMetric',
items=('precision', 'recall', 'f1-score'),
average='micro',
prefix='val/micro',
), # overall mean
]
runner = Runner(
# the model used for training and validation.
# Needs to meet specific interface requirements
default_scope="mmpretrain",
model=model,
# working directory which saves training logs and weight files
work_dir='./work_dir',
# train dataloader needs to meet the PyTorch data loader protocol
train_dataloader=train_dataloader,
# optimize wrapper for optimization with additional features like
# AMP, gradtient accumulation, etc
# optim_wrapper=dict(optimizer=dict(type=SGD, lr=0.001, momentum=0.9)),
optim_wrapper=dict(type='OptimWrapper',
# accumulative_counts=accumulation,
optimizer=dict(type='SGD',
lr=learning_rate,
momentum=0.9,
weight_decay=0.0005),
clip_grad=None),
# trainging coinfs for specifying training epoches, verification intervals, etc
train_cfg=dict(by_epoch=True, max_epochs=epoch, val_interval=1),
# validation dataloaer also needs to meet the PyTorch data loader protocol
val_dataloader=val_dataloader,
# validation configs for specifying additional parameters required for validation
val_cfg=dict(),
# validation evaluator. The default one is used here
# val_evaluator=dict(type=Accuracy),
val_evaluator=val_evaluator,
randomness=dict(seed=random_seed),
visualizer=dict(type='Visualizer', vis_backends=[dict(type='ClearMLVisBackend')]),
default_hooks=dict(checkpoint=dict(
type='CheckpointHook',
max_keep_ckpts=1,
save_last=True,
save_best=[
'auto',
],
),)
)
runner.train()
if __name__ == "__main__":
task = clearml.Task.init(project_name="mmpretrain", task_name='mmpretrain')
main()
Looks like it's because I did not do the mmpretrain way with dict config file ...