Examples: query, "exact match", wildcard*, wild?ard, wild*rd
Fuzzy search: cake~ (finds cakes, bake)
Term boost: "red velvet"^4, chocolate^2
Field grouping: tags:(+work -"fun-stuff")
Escaping: Escape characters +-&|!(){}[]^"~*?:\ with \, e.g. \+
Range search: properties.timestamp:[1587729413488 TO *] (inclusive), properties.title:{A TO Z}(excluding A and Z)
Combinations: chocolate AND vanilla, chocolate OR vanilla, (chocolate OR vanilla) NOT "vanilla pudding"
Field search: properties.title:"The Title" AND text
Unanswered
Hi, Is There A Simple Way To Make


GiganticTurtle0 BTW, this mock example worked out of the box (python 3.6 on Ubuntu):
` from typing import Any, Dict, List, Tuple, Union

from clearml import Task
from dask.distributed import Client, LocalCluster

def start_dask_client(
n_workers: int = None, threads_per_worker: int = None, memory_limit: str = "2Gb"
) -> Client:
cluster = LocalCluster(
n_workers=n_workers,
threads_per_worker=threads_per_worker,
memory_limit=memory_limit,
)
client = Client(cluster)
print("Client info:", client)
print("Scheduler info:", cluster.scheduler)
print("Dashboard link:", cluster.dashboard_link, end="\n\n")
for wid, worker in cluster.workers.items():
print(f"{wid}: {worker}")
return client

class DatasetReader:
def init(self, use_dask_client: bool = False, preprocesser: Any = None) -> None:
self.dask_client = start_dask_client() if use_dask_client else None
self.preprocesser = preprocesser

def read_dataset(self, filepaths: Union[str, List]) -> list:
    def read_and_process_file(filepath):
        print(f"{filepath} successfully loaded.")
        return 1

    futures = self.dask_client.map(read_and_process_file, filepaths)
    loaded_dataset = [future.result() for future in futures]

    return loaded_dataset

if name == "main":

# Connecting ClearML with the current process.
task = Task.init(
    project_name="examples",
    task_name="parallel with dask",
)

# Create input
paths = [f"mock_file.{i:03}.dat" for i in range(0, 200)]

# Configure the dataset reader tool.
reader = DatasetReader(use_dask_client=True)

# Start files reading process and get an uniform dataset.
in_memory_dataset = reader.read_dataset(paths)

print(in_memory_dataset) `
  
  
Posted 3 years ago
164 Views
0 Answers
3 years ago
one year ago