Examples: query, "exact match", wildcard*, wild?ard, wild*rd
Fuzzy search: cake~ (finds cakes, bake)
Term boost: "red velvet"^4, chocolate^2
Field grouping: tags:(+work -"fun-stuff")
Escaping: Escape characters +-&|!(){}[]^"~*?:\ with \, e.g. \+
Range search: properties.timestamp:[1587729413488 TO *] (inclusive), properties.title:{A TO Z}(excluding A and Z)
Combinations: chocolate AND vanilla, chocolate OR vanilla, (chocolate OR vanilla) NOT "vanilla pudding"
Field search: properties.title:"The Title" AND text
Answered
Hello! Does Someone Have A Huggingface Integration Example?

Hello! Does someone have a huggingface integration example?

  
  
Posted 2 years ago
Votes Newest

Answers 3


Hello! integration in what sense? Training a model? Uploading a model to the hub? Something else?

  
  
Posted 2 years ago

Mainly logging. Huggingface's trainer has a "report_to" argument that is supported by tensorboard, wandb, comet, etc. This means that during training all of the metrics are automatically logged to the specified platform (which is very convenient). Is there anyone who has made something similar for clearml?

  
  
Posted 2 years ago

In the end of the day these integration are based on Callback classes. For example, this is the wandb Calback:
class WandbCallback(TrainerCallback): """ A :class:~transformers.TrainerCallbackthat sends the logs toWeight and Biases < >`__.
"""

def __init__(self):
    has_wandb = is_wandb_available()
    assert has_wandb, "WandbCallback requires wandb to be installed. Run `pip install wandb`."
    if has_wandb:
        import wandb

        self._wandb = wandb
    self._initialized = False
    # log outputs
    self._log_model = os.getenv("WANDB_LOG_MODEL", "FALSE").upper() in ENV_VARS_TRUE_VALUES.union({"TRUE"})

def setup(self, args, state, model, **kwargs):
    """
    Setup the optional Weights & Biases (`wandb`) integration.

    One can subclass and override this method to customize the setup if needed. Find more information `here
    < ` ` >`__. You can also override the following environment variables:

    Environment:
        WANDB_LOG_MODEL (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Whether or not to log model as artifact at the end of training. Use along with
            `TrainingArguments.load_best_model_at_end` to upload best model.
        WANDB_WATCH (:obj:`str`, `optional` defaults to :obj:`"gradients"`):
            Can be :obj:`"gradients"`, :obj:`"all"` or :obj:`"false"`. Set to :obj:`"false"` to disable gradient
            logging or :obj:`"all"` to log gradients and parameters.
        WANDB_PROJECT (:obj:`str`, `optional`, defaults to :obj:`"huggingface"`):
            Set this to a custom string to store results in a different project.
        WANDB_DISABLED (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Whether or not to disable wandb entirely. Set `WANDB_DISABLED=true` to disable.
    """
    if self._wandb is None:
        return
    self._initialized = True
    if state.is_world_process_zero:
        logger.info(
            'Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"'
        )
        combined_dict = {**args.to_sanitized_dict()}

        if hasattr(model, "config") and model.config is not None:
            model_config = model.config.to_dict()
            combined_dict = {**model_config, **combined_dict}
        trial_name = state.trial_name
        init_args = {}
        if trial_name is not None:
            run_name = trial_name
            init_args["group"] = args.run_name
        else:
            run_name = args.run_name

        if self._wandb.run is None:
            self._wandb.init(
                project=os.getenv("WANDB_PROJECT", "huggingface"),
                name=run_name,
                **init_args,
            )
        # add config parameters (run may have been created manually)
        self._wandb.config.update(combined_dict, allow_val_change=True)

        # define default x-axis (for latest wandb versions)
        if getattr(self._wandb, "define_metric", None):
            self._wandb.define_metric("train/global_step")
            self._wandb.define_metric("*", step_metric="train/global_step", step_sync=True)

        # keep track of model topology and gradients, unsupported on TPU
        if not is_torch_tpu_available() and os.getenv("WANDB_WATCH") != "false":
            self._wandb.watch(
                model, log=os.getenv("WANDB_WATCH", "gradients"), log_freq=max(100, args.logging_steps)
            )

def on_train_begin(self, args, state, control, model=None, **kwargs):
    if self._wandb is None:
        return
    hp_search = state.is_hyper_param_search
    if hp_search:
        self._wandb.finish()
        self._initialized = False
    if not self._initialized:
        self.setup(args, state, model, **kwargs)

def on_train_end(self, args, state, control, model=None, tokenizer=None, **kwargs):
    if self._wandb is None:
        return
    if self._log_model and self._initialized and state.is_world_process_zero:
        from .trainer import Trainer

        fake_trainer = Trainer(args=args, model=model, tokenizer=tokenizer)
        with tempfile.TemporaryDirectory() as temp_dir:
            fake_trainer.save_model(temp_dir)
            metadata = (
                {
                    k: v
                    for k, v in dict(self._wandb.summary).items()
                    if isinstance(v, numbers.Number) and not k.startswith("_")
                }
                if not args.load_best_model_at_end
                else {
                    f"eval/{args.metric_for_best_model}": state.best_metric,
                    "train/total_floss": state.total_flos,
                }
            )
            artifact = self._wandb.Artifact(name=f"model-{self._wandb.run.id}", type="model", metadata=metadata)
            for f in Path(temp_dir).glob("*"):
                if f.is_file():
                    with artifact.new_file(f.name, mode="wb") as fa:
                        fa.write(f.read_bytes())
            self._wandb.run.log_artifact(artifact)

def on_log(self, args, state, control, model=None, logs=None, **kwargs):
    if self._wandb is None:
        return
    if not self._initialized:
        self.setup(args, state, model)
    if state.is_world_process_zero:
        logs = rewrite_logs(logs)
        self._wandb.log({**logs, "train/global_step": state.global_step}) `So I'm looking for something similar for Clearml.
  
  
Posted 2 years ago
966 Views
3 Answers
2 years ago
one year ago
Tags