I am studying using the「Pipeline from Tasks 」pages.
None
I am facing two problems.
- Q1:- I want to load a local csv in the step1 task and split it in step2, but I get an error that the csv cannot be found.
- Can't local data be read when running in a queue?- Q2:- When I register a queue or try to run it locally, it looks for an unknown queue name. The error code is [ValueError: Could not find queue named “services”].
- I have and run a queue called local, but I have never run a queue called services.Thank you!
C:\USERS\USER\DESKTOP\TEST
data.csv
test1.py
test2.py
test_all.py
step1(test1.py)
from clearml import Task
import pandas as pd
task = Task.init(project_name="test_test", task_name="step1")
args = {
'csv_file': 'data.csv',
'target_column': 'target'
}
task.connect(args)
task.execute_remotely()
data = pd.read_csv(args['csv_file'])
task.upload_artifact('dataset', artifact_object=data)
step2(test2.py)
from clearml import Task
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
task = Task.init(project_name="test_test", task_name="step2")
args = {
'dataset_task_id': '',
'test_size': 0.2,
'random_state': 42,
'target_column': 'target'
}
task.connect(args)
task.execute_remotely()
dataset_task = Task.get_task(task_id=args['dataset_task_id'])
data = dataset_task.artifacts['dataset'].get()
y = data[args['target_column']]
X = data.drop(columns=[args['target_column']])
categorical_columns = X.select_dtypes(include=['object']).columns
for col in categorical_columns:
le = LabelEncoder()
X[col] = le.fit_transform(X[col].astype(str))
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=args['test_size'], random_state=args['random_state']
)
task.upload_artifact('X_train', X_train)
task.upload_artifact('X_test', X_test)
task.upload_artifact('y_train', y_train)
task.upload_artifact('y_test', y_test)
bind step1 and 2
from clearml.automation import PipelineController
pipe = PipelineController(
name="from task",
project="test_test",
version="0.0.1",
add_pipeline_tags=False
)
pipe.add_parameter("csv_file", "data.csv")
pipe.add_parameter("target_column", "target")
pipe.add_parameter("test_size", 0.2)
pipe.add_parameter("random_state", 42)
pipe.add_step(
name="load_data",
base_task_project="test_test",
base_task_name="step1",
parameter_override={
"General/csv_file": "${pipeline.csv_file}",
"General/target_column": "${pipeline.target_column}"
}
)
pipe.add_step(
name="process_data",
parents=["load_data"],
base_task_project="test_test",
base_task_name="step2",
parameter_override={
"General/dataset_task_id": "${load_data.id}",
"General/test_size": "${pipeline.test_size}",
"General/random_state": "${pipeline.random_state}",
"General/target_column": "${pipeline.target_column}"
}
)
pipe.set_default_execution_queue(default_execution_queue = "local")
pipe.start()
#pipe.start_locally()
result
C:\Users\[username]\Desktop\[project]>python test_all.py
ClearML Task: created new task id=[task_id]
[timestamp] - clearml.Task - INFO - No repository found, storing script code instead
ClearML results page:
ClearML pipeline page:
Traceback (most recent call last):
File "C:\Users\[username]\Desktop\[project]\test_all.py", line 39, in <module>
pipe.start()
File "[path]\Lib\site-packages\clearml\automation\controller.py", line 1035, in start
self._task.execute_remotely(queue_name=queue, exit_process=True, clone=False)
File "[path]\Lib\site-packages\clearml\task.py", line 3163, in execute_remotely
Task.enqueue(task, queue_name=queue_name)
File "[path]\Lib\site-packages\clearml\task.py", line 1542, in enqueue
raise ValueError('Could not find queue named "{}"'.format(queue_name))
ValueError: Could not find queue named "services"