Examples: query, "exact match", wildcard*, wild?ard, wild*rd
Fuzzy search: cake~ (finds cakes, bake)
Term boost: "red velvet"^4, chocolate^2
Field grouping: tags:(+work -"fun-stuff")
Escaping: Escape characters +-&|!(){}[]^"~*?:\ with \, e.g. \+
Range search: properties.timestamp:[1587729413488 TO *] (inclusive), properties.title:{A TO Z}(excluding A and Z)
Combinations: chocolate AND vanilla, chocolate OR vanilla, (chocolate OR vanilla) NOT "vanilla pudding"
Field search: properties.title:"The Title" AND text
Answered
Probably Found A Bug Related To

Probably found a bug related to storage.verify_upload(uri) working incorrectly with cache
clearml.__version__ -> '1.12.2'

Scenario 1

import clearml
out_ds = clearml.Dataset.create(
    dataset_project="test",
    dataset_name=f"test",
    output_uri="
",
) 

Work correctly

Scenario 2

import clearml
# open other dataset, that is located in the same bucket
ds = clearml.Dataset.get("1a607bbeb31b4e2c8033112950827d8e")
out_ds = clearml.Dataset.create(
    dataset_project="test",
    dataset_name=f"test",
    output_uri="
",
) 

Error in the thread

  
  
Posted 6 months ago
Votes Newest

Answers 5


Error:
---------------------------------------------------------------------------
NotFound                                  Traceback (most recent call last)
Cell In[1], line 3
      1 import clearml
      2 ds = clearml.Dataset.get("1a607bbeb31b4e2c8033112950827d8e")
----> 3 out_ds = clearml.Dataset.create(
      4     dataset_project="test",
      5     dataset_name=f"test",
      6     output_uri="
",
      7 )

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/clearml/datasets/dataset.py:1304, in Dataset.create(cls, dataset_name, dataset_project, dataset_tags, parent_datasets, use_current_task, dataset_version, output_uri, description)
   1302     instance._task.output_uri = output_uri
   1303     # noinspection PyProtectedMember
-> 1304     instance._task.get_logger().set_default_upload_destination(output_uri)
   1305 # noinspection PyProtectedMember
   1306 instance._using_current_task = use_current_task

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/clearml/logger.py:1107, in Logger.set_default_upload_destination(self, uri)
   1104 storage = StorageHelper.get(uri)
   1106 # Verify that we can upload to this destination
-> 1107 uri = storage.verify_upload(folder_uri=uri)
   1109 self._default_upload_destination = uri

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/clearml/storage/helper.py:2405, in StorageHelper.verify_upload(self, folder_uri, raise_on_error, log_on_error)
   2397     _Boto3Driver._test_bucket_config(
   2398         self._conf,
   2399         self._log,
   (...)
   2402         log_on_error=log_on_error,
   2403     )
   2404 elif self._scheme == _GoogleCloudStorageDriver.scheme:
-> 2405     self._driver.test_upload(test_path, self._conf)
   2407 elif self._scheme == 'file':
   2408     # Check path exists
   2409     Path(test_path).mkdir(parents=True, exist_ok=True)

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/clearml/storage/helper.py:912, in _GoogleCloudStorageDriver.test_upload(self, test_path, config, **_)
    909         test_obj = blob
    911 permissions_to_test = ('storage.objects.get', 'storage.objects.update')
--> 912 return set(test_obj.test_iam_permissions(permissions_to_test)) == set(permissions_to_test)

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/google/cloud/storage/bucket.py:2940, in Bucket.test_iam_permissions(self, permissions, client, timeout, retry)
   2937     query_params["userProject"] = self.user_project
   2939 path = f"{self.path}/iam/testPermissions"
-> 2940 resp = client._get_resource(
   2941     path,
   2942     query_params=query_params,
   2943     timeout=timeout,
   2944     retry=retry,
   2945     _target_object=None,
   2946 )
   2947 return resp.get("permissions", [])

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/google/cloud/storage/client.py:387, in Client._get_resource(self, path, query_params, headers, timeout, retry, _target_object)
    331 def _get_resource(
    332     self,
    333     path,
   (...)
    338     _target_object=None,
    339 ):
    340     """Helper for bucket / blob methods making API 'GET' calls.
    341 
    342     Args:
   (...)
    385             If the bucket is not found.
    386     """
--> 387     return self._connection.api_request(
    388         method="GET",
    389         path=path,
    390         query_params=query_params,
    391         headers=headers,
    392         timeout=timeout,
    393         retry=retry,
    394         _target_object=_target_object,
    395     )

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/google/cloud/storage/_http.py:72, in Connection.api_request(self, *args, **kwargs)
     70     if retry:
     71         call = retry(call)
---> 72 return call()

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/google/api_core/retry.py:372, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
    368 target = functools.partial(func, *args, **kwargs)
    369 sleep_generator = exponential_sleep_generator(
    370     self._initial, self._maximum, multiplier=self._multiplier
    371 )
--> 372 return retry_target(
    373     target,
    374     self._predicate,
    375     sleep_generator,
    376     self._timeout,
    377     on_error=on_error,
    378 )

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/google/api_core/retry.py:207, in retry_target(target, predicate, sleep_generator, timeout, on_error, **kwargs)
    205 for sleep in sleep_generator:
    206     try:
--> 207         result = target()
    208         if inspect.isawaitable(result):
    209             warnings.warn(_ASYNC_RETRY_WARNING)

File ~/miniconda3/envs/vfm38/lib/python3.8/site-packages/google/cloud/_http/__init__.py:494, in JSONConnection.api_request(self, method, path, query_params, data, content_type, headers, api_base_url, api_version, expect_json, _target_object, timeout, extra_api_info)
    482 response = self._make_request(
    483     method=method,
    484     url=url,
   (...)
    490     extra_api_info=extra_api_info,
    491 )
    493 if not 200 <= response.status_code < 300:
--> 494     raise exceptions.from_http_response(response)
    496 if expect_json and response.content:
    497     return response.json()

NotFound: 404 GET 
: Not Found
  
  
Posted 6 months ago

Oh. I just tested it in the new version, it really works now. Thank you very much!

  
  
Posted 6 months ago

Sorry for reporting a bug without testing on the newest version

  
  
Posted 6 months ago

Looks like opening a dataset that is stored on gcp bucket and then trying to create another dataset on the same bucket creates issues.

Maybe there is a hotfix to reset clearml storage manager cache in the middle of the script?

  
  
Posted 6 months ago

Hi @<1523701137134325760:profile|CharmingStarfish14> , I think this was fixed in the last version ( PR ) - please try v1.13.2

  
  
Posted 6 months ago
338 Views
5 Answers
6 months ago
6 months ago
Tags