Pipeline controller is getting stuck just after installing python packages
Hi @<1768447000723853312:profile|RipeSeaanemone60> , can you please provide the full log? Is it the pipeline controller that is getting stuck or some step?
Hello everyone! I am new to clearML and currently trying out its capabilities and I am having one issue with pipelines. Pipeline runs properly when I run it locally or if I clone the project and then run it remotely by en-queuing it in a queue but if I directly try to run the pipeline through terminal it gets stuck. I am following the clearML tutorial on youtube. (I am running on self hosted clearml server and agent in docker mode). Any help would be highly appreciated!
Through console I can see that it is getting stuck after this:
Downloading filelock-3.16.1-py3-none-any.whl (16 kB)
Installing collected packages: distlib, urllib3, six, rpds-py, PyYAML, pyparsing, pyjwt, psutil, platformdirs, idna, filelock, charset-normalizer, certifi, attrs, virtualenv, requests, referencing, python-dateutil, pathlib2, orderedmultidict, jsonschema-specifications, furl, jsonschema, clearml-agent
Successfully installed PyYAML-6.0.2 attrs-23.2.0 certifi-2024.8.30 charset-normalizer-3.4.0 clearml-agent-1.9.2 distlib-0.3.9 filelock-3.16.1 furl-2.1.3 idna-3.10 jsonschema-4.23.0 jsonschema-specifications-2024.10.1 orderedmultidict-1.0.1 pathlib2-2.3.7.post1 platformdirs-4.3.6 psutil-5.9.8 pyjwt-2.8.0 pyparsing-3.1.4 python-dateutil-2.8.2 referencing-0.35.1 requests-2.31.0 rpds-py-0.21.0 six-1.16.0 urllib3-1.26.20 virtualenv-20.27.1
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: None
The pipeline file i used
from clearml import TaskTypes
from clearml.automation.controller import PipelineDecorator
@PipelineDecorator.component(
return_values=['X_train', 'y_train', 'X_test', 'y_test'],
task_type=TaskTypes.data_processing
)
def prepare_data(dataset_name):
# Imports first
from clearml import Dataset
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Read the data
data_path = Dataset.get(dataset_name=dataset_name, alias=dataset_name).get_local_copy()
fashion_mnist_test = pd.read_csv(f"{data_path}/fashion-mnist_test.csv")
fashion_mnist_train = pd.read_csv(f"{data_path}/fashion-mnist_train.csv")
# Load in the train and test sets
X_train = np.array(fashion_mnist_train.iloc[:,1:])
y_train = np.array(fashion_mnist_train.iloc[:,0])
X_test = np.array(fashion_mnist_test.iloc[:,1:])
y_test = np.array(fashion_mnist_test.iloc[:,0])
# Plot one of them to make sure everything is alright
plt.imshow(X_train[1].reshape((28, 28)))
plt.title("Sample Image")
plt.show()
return X_train, y_train, X_test, y_test
@PipelineDecorator.component(return_values=['model'], task_type=TaskTypes.training)
def train_model(X_train, y_train):
# Imports first
import xgboost as xgb
from clearml import Task
# Load the data into XGBoost format
dtrain = xgb.DMatrix(X_train, label=y_train)
# Set the parameters
params = {
"objective": "reg:squarederror",
"eval_metric": "rmse",
"max_depth": 4, # the maximum depth of each tree
"eta": 0.3, # the training step for each iteration
"gamma": 0,
"max_delta_step": 1,
"subsample": 1,
"sampling_method": "uniform",
"seed": 42
}
Task.current_task().connect(params)
# Train the XGBoost Model
model = xgb.train(
params,
dtrain,
num_boost_round=25,
evals=[(dtrain, "train")],
verbose_eval=0,
)
# Save the model
model.save_model("best_model")
return model
@PipelineDecorator.component(return_values=['accuracy'], cache=True, task_type=TaskTypes.qc)
def evaluate_model(model, X_test, y_test):
# Imports first
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import xgboost as xgb
from xgboost import plot_tree
# Load the data in XGBoost format
dtest = xgb.DMatrix(X_test, label=y_test)
# Make predictions for test data
y_pred = model.predict(dtest)
predictions = [round(value) for value in y_pred]
# Evaluate predictions
accuracy = accuracy_score(dtest.get_label(), predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
return accuracy
@PipelineDecorator.pipeline(name='Simple Pipeline', project='Full Overview', version='0.0.5', default_queue='default', docker='nvidia/cuda:12.4.0-runtime-ubuntu22.04')
def run_pipeline(dataset_name):
# Imports first
from clearml import Task
# Get the data in XGBoost format
X_train, y_train, X_test, y_test = prepare_data(dataset_name=dataset_name)
# Train an XGBoost model on the data
model = train_model(X_train, y_train)
# Evaluate the model
accuracy = evaluate_model(model, X_test, y_test)
Task.current_task().get_logger().report_single_value(name="Accuracy", value=accuracy)
# This is blocked until the final step is completed successfully!
print(accuracy)
return accuracy
if __name__ == "__main__":
# PipelineDecorator.run_locally()
run_pipeline(dataset_name="Fashion MNIST")
Pipeline controller is getting stuck just after installing python packages
Hi @<1768447000723853312:profile|RipeSeaanemone60> , can you please provide the full log? Is it the pipeline controller that is getting stuck or some step?