Examples: query, "exact match", wildcard*, wild?ard, wild*rd
Fuzzy search: cake~ (finds cakes, bake)
Term boost: "red velvet"^4, chocolate^2
Field grouping: tags:(+work -"fun-stuff")
Escaping: Escape characters +-&|!(){}[]^"~*?:\ with \, e.g. \+
Range search: properties.timestamp:[1587729413488 TO *] (inclusive), properties.title:{A TO Z}(excluding A and Z)
Combinations: chocolate AND vanilla, chocolate OR vanilla, (chocolate OR vanilla) NOT "vanilla pudding"
Field search: properties.title:"The Title" AND text
Answered
Hi Again! I'M Trying To Figure Out Why My Hpo Doesn'T Seem To Work For This Simple

Hi Again! I'm trying to figure out why my HPO doesn't seem to work for this simple xgboost example I'm testing -- I see that the tasks are being cloned from the base model, but I don't see any outputs indicating what the evaluation metric is (in my case, F1 ) for each of the cloned tasks and as such cannot decide which task/its hyperparameter ( scale_pos_weight ) performed the best:

Here's the base model:

from clearml import Task,  Model
from sklearn.datasets import make_classification
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score, roc_curve, auc
from utils.logging import run_and_log_eval_metrics, plot_and_log_auc
import matplotlib.pyplot as plt
import argparse

###### Argument Parser ########
ap = argparse.ArgumentParser()
ap.add_argument("-w0", "--weight_0", required = True, help="Y=0 Probability")
args = vars(ap.parse_args())

###### Enums ##################

RANDOM_STATE = 42
TEST_SIZE = 0.2
ITERATION = 0
SCALE_POS_WEIGHT = 1
CLASS_SEP = 0.7
NAME = 'xgboost-binary-train-base-model'
PROJECT_NAME = "XGBoost Experiments"

WEIGHT_0 = float(args["weight_0"])
WEIGHT_1 = 1-WEIGHT_0
WEIGHTS = [WEIGHT_0, WEIGHT_1]

##############################
# PART 0: Initialize ClearML
task = Task.init(project_name=PROJECT_NAME, 
                 task_name=NAME)
logger = task.get_logger()

# Connect hyperparameters
params = {
    'scale_pos_weight': SCALE_POS_WEIGHT
}
params = task.connect(params)

##############################
# PART 1: Generate a synthetic dataset with 2 classes
X, y = make_classification(n_samples=10000, 
                           n_features=10, 
                           n_informative=2, 
                           n_redundant=2, 
                           n_classes=2,
                           weights=WEIGHTS,
                           class_sep=CLASS_SEP,
                           random_state=RANDOM_STATE)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
##############################
# PART 2: Initialize XGBClassifier, Train/Fit Model

model = XGBClassifier(objective='binary:logistic', 
                      random_state=RANDOM_STATE,
                      scale_pos_weight=params['scale_pos_weight'])

# Fit the model to training data
model.fit(X_train, y_train)

##############################
# PART 3: Save Model, Upload Artifacts + Evals to Server
# model.save_model(NAME + ".json")
# artifact = task.upload_artifact("model", NAME + ".json")

# Get predictions
preds = model.predict(X_test)

# Evaluation/Logging 
run_and_log_eval_metrics(logger, y_test, preds, iteration=ITERATION)

with the utils functions produced by the below code:

from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score, roc_curve, auc
import matplotlib.pyplot as plt

def run_and_log_eval_metrics(logger, y_test, preds, iteration) -> None: 
    accuracy = accuracy_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    precision = precision_score(y_test, preds)

    logger.report_scalar(title="Metrics", series="Accuracy", iteration=iteration, value=accuracy)
    logger.report_scalar(title="Metrics", series="Precision", iteration=iteration, value=precision)
    logger.report_scalar(title="Metrics", series="Recall", iteration=iteration, value=recall)
    logger.report_scalar(title="Metrics", series="F1", iteration=iteration, value=f1)

I'm trying to optimize for the highest f1 score:

from clearml import Task, InputModel
from sklearn.datasets import make_classification
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from utils.logging import run_and_log_eval_metrics, plot_and_log_auc
import argparse
from clearml.automation import UniformParameterRange, HyperParameterOptimizer


###### Argument Parser ########
ap = argparse.ArgumentParser()
ap.add_argument("-tid", "--taskid", required = True, help="Task ID")
args = vars(ap.parse_args())

###### Enums ##################

# RANDOM_STATE = 42
# TEST_SIZE = 0.2
# CLASS_SEP = 0.7
# WEIGHTS = [0.9, 0.1]
NAME = 'xgboost-binary-find-best-balance'
PROJECT_NAME = "XGBoost Experiments"

############################################
# PART 0: Initialize ClearML

task = Task.init(project_name=PROJECT_NAME, 
                 task_name=NAME,
                 task_type=Task.TaskTypes.optimizer)

############################################
# PART 1: Run Hyperparameter Optimizer (HPO)

optimizer = HyperParameterOptimizer(
    base_task_id=args["taskid"], 
    hyper_parameters=[
        UniformParameterRange('General/scale_pos_weight', min_value=1.0, max_value=10.0, step_size=1),
    ],
    objective_metric_title='Metrics',
    objective_metric_series='F1',
    objective_metric_sign='min', 
    max_iteration=10, 
    execution_queue='default', 
    optimize_task_parameters=True,
)

optimizer.start()
optimizer.set_time_limit(in_minutes=1.)
# wait until optimization completed or timed-out
optimizer.wait()
top_exp = optimizer.get_top_experiments(top_k=1)
print([t.id for t in top_exp])
# make sure we stop all jobs
optimizer.stop()

print('We are done, good bye')
  
  
Posted one month ago
Votes Newest

Answers 2


Are the cloned tasks running? Can you add logs from the HPO and one of the child tasks?

  
  
Posted one month ago

Here's what I see on the webUI, but no indication of the f1 score from each of the cloned optimization tasks:
image

  
  
Posted one month ago
215 Views
2 Answers
one month ago
one month ago
Tags