Unanswered
Autoscaler Parallelization Issue:
I Have An Aws Autoscaler Set Up With A Resource That Has A Max Of 3 Instances Assigned To The
Two values:
`
@PipelineDecorator.component(
return_values=["run_model_path", "run_tb_path"],
cache=False,
task_type=TaskTypes.training,
packages=[
"clearml",
"tensorboard_logger",
"timm",
"fastai",
"torch==1.11.0",
"torchvision==0.12.0",
"protobuf==3.19.*",
"tensorboard",
"google-cloud-storage>=1.13.2",
],
repo="git@github.com:shpigi/clearml_evaluation.git",
repo_branch="main",
)
def train_image_classifier_component(
clearml_dataset,
backbone_name,
image_resize: int,
batch_size: int,
run_model_uri,
run_tb_uri,
local_data_path,
num_epochs: int,
):
import sys
sys.path.insert(0, "/src/clearml_evaluation/")
from image_classifier_training import pipeline_functions
run_model_path, run_tb_path = pipeline_functions.train_image_classifier(
clearml_dataset,
backbone_name,
image_resize,
batch_size,
run_model_uri,
run_tb_uri,
local_data_path,
num_epochs,
)
return run_model_path, run_tb_path `
171 Views
0
Answers
2 years ago
one year ago