Hi @<1768447000723853312:profile|RipeSeaanemone60> , can you please provide the full log? Is it the pipeline controller that is getting stuck or some step?
Pipeline controller is getting stuck just after installing python packages
Hello everyone! I am new to clearML and currently trying out its capabilities and I am having one issue with pipelines. Pipeline runs properly when I run it locally or if I clone the project and then run it remotely by en-queuing it in a queue but if I directly try to run the pipeline through terminal it gets stuck. I am following the clearML tutorial on youtube. (I am running on self hosted clearml server and agent in docker mode). Any help would be highly appreciated!
Through console I can see that it is getting stuck after this:
Downloading filelock-3.16.1-py3-none-any.whl (16 kB)
Installing collected packages: distlib, urllib3, six, rpds-py, PyYAML, pyparsing, pyjwt, psutil, platformdirs, idna, filelock, charset-normalizer, certifi, attrs, virtualenv, requests, referencing, python-dateutil, pathlib2, orderedmultidict, jsonschema-specifications, furl, jsonschema, clearml-agent
Successfully installed PyYAML-6.0.2 attrs-23.2.0 certifi-2024.8.30 charset-normalizer-3.4.0 clearml-agent-1.9.2 distlib-0.3.9 filelock-3.16.1 furl-2.1.3 idna-3.10 jsonschema-4.23.0 jsonschema-specifications-2024.10.1 orderedmultidict-1.0.1 pathlib2-2.3.7.post1 platformdirs-4.3.6 psutil-5.9.8 pyjwt-2.8.0 pyparsing-3.1.4 python-dateutil-2.8.2 referencing-0.35.1 requests-2.31.0 rpds-py-0.21.0 six-1.16.0 urllib3-1.26.20 virtualenv-20.27.1
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: None
The pipeline file i used
from clearml import TaskTypes
from clearml.automation.controller import PipelineDecorator
@PipelineDecorator.component(
return_values=['X_train', 'y_train', 'X_test', 'y_test'],
task_type=TaskTypes.data_processing
)
def prepare_data(dataset_name):
# Imports first
from clearml import Dataset
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Read the data
data_path = Dataset.get(dataset_name=dataset_name, alias=dataset_name).get_local_copy()
fashion_mnist_test = pd.read_csv(f"{data_path}/fashion-mnist_test.csv")
fashion_mnist_train = pd.read_csv(f"{data_path}/fashion-mnist_train.csv")
# Load in the train and test sets
X_train = np.array(fashion_mnist_train.iloc[:,1:])
y_train = np.array(fashion_mnist_train.iloc[:,0])
X_test = np.array(fashion_mnist_test.iloc[:,1:])
y_test = np.array(fashion_mnist_test.iloc[:,0])
# Plot one of them to make sure everything is alright
plt.imshow(X_train[1].reshape((28, 28)))
plt.title("Sample Image")
plt.show()
return X_train, y_train, X_test, y_test
@PipelineDecorator.component(return_values=['model'], task_type=TaskTypes.training)
def train_model(X_train, y_train):
# Imports first
import xgboost as xgb
from clearml import Task
# Load the data into XGBoost format
dtrain = xgb.DMatrix(X_train, label=y_train)
# Set the parameters
params = {
"objective": "reg:squarederror",
"eval_metric": "rmse",
"max_depth": 4, # the maximum depth of each tree
"eta": 0.3, # the training step for each iteration
"gamma": 0,
"max_delta_step": 1,
"subsample": 1,
"sampling_method": "uniform",
"seed": 42
}
Task.current_task().connect(params)
# Train the XGBoost Model
model = xgb.train(
params,
dtrain,
num_boost_round=25,
evals=[(dtrain, "train")],
verbose_eval=0,
)
# Save the model
model.save_model("best_model")
return model
@PipelineDecorator.component(return_values=['accuracy'], cache=True, task_type=TaskTypes.qc)
def evaluate_model(model, X_test, y_test):
# Imports first
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import xgboost as xgb
from xgboost import plot_tree
# Load the data in XGBoost format
dtest = xgb.DMatrix(X_test, label=y_test)
# Make predictions for test data
y_pred = model.predict(dtest)
predictions = [round(value) for value in y_pred]
# Evaluate predictions
accuracy = accuracy_score(dtest.get_label(), predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
return accuracy
@PipelineDecorator.pipeline(name='Simple Pipeline', project='Full Overview', version='0.0.5', default_queue='default', docker='nvidia/cuda:12.4.0-runtime-ubuntu22.04')
def run_pipeline(dataset_name):
# Imports first
from clearml import Task
# Get the data in XGBoost format
X_train, y_train, X_test, y_test = prepare_data(dataset_name=dataset_name)
# Train an XGBoost model on the data
model = train_model(X_train, y_train)
# Evaluate the model
accuracy = evaluate_model(model, X_test, y_test)
Task.current_task().get_logger().report_single_value(name="Accuracy", value=accuracy)
# This is blocked until the final step is completed successfully!
print(accuracy)
return accuracy
if __name__ == "__main__":
# PipelineDecorator.run_locally()
run_pipeline(dataset_name="Fashion MNIST")
Hi @<1768447000723853312:profile|RipeSeaanemone60> , can you please provide the full log? Is it the pipeline controller that is getting stuck or some step?
Pipeline controller is getting stuck just after installing python packages