Hi ApprehensiveFox95
I think this is what you are looking for:step1 = Task.create( project_name='examples', task_name='pipeline step 1 dataset artifact', repo='
` ',
working_directory='examples/pipeline',
script='step1_dataset_artifact.py',
docker='nvcr.io/nvidia/pytorch:20.11-py3'
).id
step2 = Task.create(
project_name='examples', task_name='pipeline step 2 process dataset',
repo=' ',
working_directory='examples/pipeline',
script='step2_data_processing.py',
docker='nvcr.io/nvidia/pytorch:20.11-py3'
).id
step3 = Task.create(
project_name='examples', task_name='pipeline step 3 train model',
repo=' ',
working_directory='examples/pipeline',
script='step3_train_model.py',
docker='nvcr.io/nvidia/pytorch:20.11-py3'
).id
Connecting ClearML with the current process,
from here on everything is logged automatically
task = Task.init(project_name='examples', task_name='pipeline demo',
task_type=Task.TaskTypes.controller, reuse_last_task_id=False)
pipe = PipelineController(default_execution_queue='default', add_pipeline_tags=False)
pipe.add_step(name='stage_data', base_task_project='examples', base_task_id=step1,
clone_base_task=False)
pipe.add_step(name='stage_process', parents=['stage_data', ],
base_task_project='examples', base_task_id=step2,
clone_base_task=False,
parameter_override={'General/dataset_url': '${stage_data.artifacts.dataset.url}',
'General/test_size': 0.25})
pipe.add_step(name='stage_train', parents=['stage_process', ],
base_task_project='examples', base_task_id=step3,
clone_base_task=False,
parameter_override={'General/dataset_task_id': '${stage_process.id}'}) You might need the latest clearml:
pip install git+ `