`
import os
import glob
from clearml import Dataset
DATASET_NAME = "Bug"
DATASET_PROJECT = "ProjectFolder"
TARGET_FOLDER = "clearml_bug"
S3_BUCKET = os.getenv('S3_BUCKET')
if not os.path.exists(TARGET_FOLDER):
os.makedirs(TARGET_FOLDER)
with open(f'{TARGET_FOLDER}/data.txt', 'w') as f:
f.writelines('Hello, ClearML')
target_files = glob.glob(TARGET_FOLDER + "/**/*", recursive=True)
# upload dataset
dataset = Dataset.create(dataset_name=DATASET_NAME, dataset_project=DATASET_PROJECT)
dataset.add_files(TARGET_FOLDER)
dataset.upload(
show_progress=True,
verbose=False,
output_url=S3_BUCKET,
compression=None,
)
getting a local copy of the dataset
dataset_folder = Dataset.get(
dataset.id
).get_local_copy()
target_files = glob.glob(TARGET_FOLDER + "//*", recursive=True)
downloaded_files = glob.glob(dataset_folder + "//*", recursive=True)
# test upload
assert target_files
assert downloaded_files
assert [os.path.basename(x) for x in target_files] == [
os.path.basename(x) for x in downloaded_files
] `