GiganticTurtle0 BTW, this mock example worked out of the box (python 3.6 on Ubuntu):
` from typing import Any, Dict, List, Tuple, Union
from clearml import Task
from dask.distributed import Client, LocalCluster
def start_dask_client(
n_workers: int = None, threads_per_worker: int = None, memory_limit: str = "2Gb"
) -> Client:
cluster = LocalCluster(
n_workers=n_workers,
threads_per_worker=threads_per_worker,
memory_limit=memory_limit,
)
client = Client(cluster)
print("Client info:", client)
print("Scheduler info:", cluster.scheduler)
print("Dashboard link:", cluster.dashboard_link, end="\n\n")
for wid, worker in cluster.workers.items():
print(f"{wid}: {worker}")
return client
class DatasetReader:
def init(self, use_dask_client: bool = False, preprocesser: Any = None) -> None:
self.dask_client = start_dask_client() if use_dask_client else None
self.preprocesser = preprocesser
def read_dataset(self, filepaths: Union[str, List]) -> list:
def read_and_process_file(filepath):
print(f"{filepath} successfully loaded.")
return 1
futures = self.dask_client.map(read_and_process_file, filepaths)
loaded_dataset = [future.result() for future in futures]
return loaded_dataset
if name == "main":
# Connecting ClearML with the current process.
task = Task.init(
project_name="examples",
task_name="parallel with dask",
)
# Create input
paths = [f"mock_file.{i:03}.dat" for i in range(0, 200)]
# Configure the dataset reader tool.
reader = DatasetReader(use_dask_client=True)
# Start files reading process and get an uniform dataset.
in_memory_dataset = reader.read_dataset(paths)
print(in_memory_dataset) `