Examples: query, "exact match", wildcard*, wild?ard, wild*rd
Fuzzy search: cake~ (finds cakes, bake)
Term boost: "red velvet"^4, chocolate^2
Field grouping: tags:(+work -"fun-stuff")
Escaping: Escape characters +-&|!(){}[]^"~*?:\ with \, e.g. \+
Range search: properties.timestamp:[1587729413488 TO *] (inclusive), properties.title:{A TO Z}(excluding A and Z)
Combinations: chocolate AND vanilla, chocolate OR vanilla, (chocolate OR vanilla) NOT "vanilla pudding"
Field search: properties.title:"The Title" AND text
Unanswered
Hi Guys, I Am Having Some Trouble Running Some Training Scripts With The Agent Functionality:


` from importlib.machinery import EXTENSION_SUFFIXES
import catboost
from clearml import Task, Logger, Dataset

import lightgbm as lgb
import numpy as np
import pandas as pd
import dask.dataframe as dd
import matplotlib.pyplot as plt

MODELS = {
'catboost': {
'model_class': catboost.CatBoostClassifier,
'file_extension': 'cbm'
},
'lgbm': {
'model_class': lgb.LGBMClassifier,
'file_extension': 'txt'
}
}

class ModelTrainer():
def init(self):
pass

@staticmethod
def train(X_train, y_train, X_val, y_val, model, fit_params):
    model.fit(
        X_train, y_train,
        eval_set=(X_val, y_val),
        **fit_params
    )
    return model

def load_dataset(
    self,
    dataset_name,
    features,
    id_cols=None,
    target='target',
    dataset_project=''
):
    dataset_path = Dataset.get(
        dataset_name=dataset_name, dataset_project=dataset_project
    ).get_local_copy()

    dataset = dd.read_parquet(dataset_path).compute()
    if id_cols:
        return dataset[features], dataset[target], dataset[id_cols]
    else:
        return dataset[features], dataset[target]

def run(
    self, exp_params,
    model_params, fit_params, reference_task_params, dataset_params
):
    model_name = exp_params['model_name']
    exp_identifier = exp_params['identifier']
    print(f"starting session - {exp_identifier}")

    # initialize ClearML task
    task = Task.init(
        project_name="RecSys",
        task_name=f"model_training - {exp_identifier}",
        output_uri=True
    )
    task.add_tags(['template'])
    task.connect(fit_params, 'fit_params')
    task.connect(model_params, 'model_params')
    task.connect(exp_params, 'exp_params')
    task.connect(reference_task_params, 'reference_task_params')
    task.connect(dataset_params, 'dataset_params')

    self.model = MODELS[model_name]['model_class'](**model_params)

    reference_task = Task.get_task(
        project_name='RecSys',
        task_name=reference_task_params['name']
    )

    columns = reference_task.artifacts[
        reference_task_params['features_articafact_name']
    ].get()

    features = [c for c in columns if c not in ID_COLS + [TARGET]]
    task.upload_artifact('features', features)
    task.upload_artifact('ID_COLS', ID_COLS)
    task.upload_artifact('target', TARGET)

    print('number of features to load: ', len(features), '\n features: ')
    print(features)

    print("loading train data")
    # load train data
    X_train, y_train, id_cols_train = self.load_dataset(
        dataset_name=dataset_params['train_name'],
        features=features,
        id_cols=ID_COLS,
        dataset_project=dataset_params['project_name'],
    )

    print("loading validation data")
    # load validation data
    X_val, y_val, id_cols_val = self.load_dataset(
        dataset_name=dataset_params['validation_name'],
        features=features,
        id_cols=ID_COLS,
        dataset_project=dataset_params['project_name'],
    )

    logger = task.get_logger()

    print('training model')
    # train model
    model = self.model
    model.fit(
        X_train, y_train,
        eval_set=(X_val, y_val),
        **fit_params
    )

    print('evaluating model')
    # evaluate model with train data
    self.evaluation_metrics(
        y_train,
        np.array([p[1] for p in model.predict_proba(X_train)]),
        "train", logger, ids_idx=id_cols_train
    )


    task.close()

if name == 'main':
rfs = ModelTrainer()
model_params = {
"loss_function": "Logloss",
"eval_metric": "AUC",
"class_weights": {0: 1, 1: 60},
"learning_rate": 0.1
}
fit_params = {
"early_stopping_rounds": 20,
"plot": True
}
reference_task_params = {
'name': 'upload_features',
'features_articafact_name': 'features_list'
}
dataset_params = {
'train_name': 'classifier train',
'validation_name': '_classifier validation',
'test_name': 'classifier test',
'project_name': '',
}
experiment_params = {
'model_name': 'catboost',
'identifier': 'catboost_remote_v0'
}
rfs.run(
experiment_params,
model_params, fit_params,
reference_task_params, dataset_params
) `

  
  
Posted 2 years ago
179 Views
0 Answers
2 years ago
one year ago