Examples: query, "exact match", wildcard*, wild?ard, wild*rd
Fuzzy search: cake~ (finds cakes, bake)
Term boost: "red velvet"^4, chocolate^2
Field grouping: tags:(+work -"fun-stuff")
Escaping: Escape characters +-&|!(){}[]^"~*?:\ with \, e.g. \+
Range search: properties.timestamp:[1587729413488 TO *] (inclusive), properties.title:{A TO Z}(excluding A and Z)
Combinations: chocolate AND vanilla, chocolate OR vanilla, (chocolate OR vanilla) NOT "vanilla pudding"
Field search: properties.title:"The Title" AND text
Unanswered
I’M Trying To Understand The Execution Flow Of Pipelines When Translating From Local To Remote Execution. I’Ve Defined A Pipeline Using The


import json
import os
import sys
from argparse import ArgumentParser
from logging import getLogger
from pathlib import Path
from typing import Callable

from clearml import PipelineDecorator, Task

from clearml_pipelines_examples.base.pipeline_settings import ExecutionMode
from clearml_pipelines_examples.pipelines.examples.train_model_on_random_data import (
    TrainModelPipelineKwargs,
    TrainModelPipelineSettings,
)
from clearml_pipelines_examples.tasks.examples import generate_dataset, train_model

logger = getLogger()


def build_pipeline(pipeline_config: TrainModelPipelineSettings) -> Callable:

    generate_dataset_config = pipeline_config.components.generate_dataset
    generate_dataset_step = PipelineDecorator.component(
        generate_dataset,
        task_type="data_processing",
        return_values=["n_data_features"],
        **generate_dataset_config.dict(),
    )

    train_model_config = pipeline_config.components.train_model
    train_model_step = PipelineDecorator.component(
        train_model,
        task_type="training",
        parents=["generate_dataset"],
        **train_model_config.dict(),
    )

    @PipelineDecorator.pipeline(
        start_controller_locally=pipeline_config.execution_mode != ExecutionMode.all_remote,
        **pipeline_config.dict(exclude={"execution_mode", "components"}),
    )
    def pipeline(
        clearml_dataset_name: str,
        clearml_dataset_project_name: str,
        clearml_model_name: str,
        generate_n_samples: int = 100,
        generate_n_features: int = 1,
        distribution_type: str = "uniform",
        epochs: int = 1,
    ):

        n_data_features = generate_dataset_step(
            clearml_dataset_name=clearml_dataset_name,
            clearml_dataset_project_name=clearml_dataset_project_name,
            n_samples=generate_n_samples,
            n_features=generate_n_features,
            dist_type=distribution_type,
        )
        train_model_step(
            clearml_dataset_name=clearml_dataset_name,
            clearml_dataset_project_name=clearml_dataset_project_name,
            clearml_model_name=clearml_model_name,
            epochs=epochs,
            n_dataset_features=n_data_features,
        )

    return pipeline


if __name__ == "__main__":

    # mbarna/TODO: move this parsing to a separate function
    parser = ArgumentParser()
    # mbarna/TODO: would be good to have this not end up in the pipeline UI config
    parser.add_argument("--config-file-path", "-f", type=Path)
    parsed_args = parser.parse_args()
    pipeline_settings = None
    pipeline_kwargs = None

    # use this to detect running in a remote context
    if task_id := os.getenv("CLEARML_TASK_ID"):
        logger.info(f"ClearML remote task: {task_id}")
        task = Task.get_task(task_id)
        pipeline_ui_config = task.get_parameters_as_dict()
        pipeline = PipelineDecorator.get_current_pipeline()
        pipeline(**pipeline_ui_config)
        sys.exit(0)

    # when running locally, load config file and get pipeline ClearML Settings
    # and pipeline function kwarg values
    elif config_file_path := parsed_args.config_file_path:
        raw_config = json.loads(config_file_path.read_text())
        pipeline_settings = TrainModelPipelineSettings.parse_obj(raw_config["settings"])
        pipeline_kwargs = TrainModelPipelineKwargs.parse_obj(raw_config["pipeline_kwargs"])
    else:
        pipeline_settings = TrainModelPipelineSettings()
        pipeline_kwargs = TrainModelPipelineKwargs()

    logger.info(
        f"ClearML Configuration: {pipeline_settings if pipeline_settings else '<remote execution>'}"
    )
    logger.info(f"Pipeline Configuration: {pipeline_kwargs}.")

    if pipeline_settings.execution_mode == ExecutionMode.all_local_single_process:
        PipelineDecorator.debug_pipeline()
    elif pipeline_settings.execution_mode == ExecutionMode.local_controller_subprocess_tasks:
        PipelineDecorator.run_locally()

    pipeline = build_pipeline(pipeline_settings)
    # start pipeline with values from config
    pipeline(**pipeline_kwargs.dict())
  
  
Posted 7 months ago
84 Views
0 Answers
7 months ago
7 months ago