AgitatedDove14
` import os
os.environ['LC_ALL'] = "C.UTF-8"
os.environ['LANG'] = "C.UTF-8"
from clearml import Task
CLEARML_PROJECT = 'Vodafone Sentiment full'
CLEARML_TASK = 'HPO_BASE_TASK'
os.environ["CLEARML_PROJECT"] = CLEARML_PROJECT
os.environ["CLEARML_TASK"] = CLEARML_TASK
os.environ['MPLBACKEND'] = "TkAg"
Task.set_credentials(
api_host=" ",
web_host=" ",
files_host=" ",
key='******************',
secret='*********************'
)
task = Task.init(project_name=CLEARML_PROJECT, task_name=CLEARML_TASK)
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.utils.data import Dataset,DataLoader
from transformers import TrainingArguments, Trainer,get_scheduler
from datasets import load_metric, load_dataset, Value, Dataset, load_from_disk
import re
import torch.nn as nn
from clearml import Dataset as ds
from datasets import load_from_disk
import os
artifact_dir = ds.get(dataset_name="vodafone_dataset_preprocessed_train_dataloader", dataset_project="vodafone dataset_full").get_local_copy()
print(os.listdir(artifact_dir))
train_dataset = load_from_disk(os.path.join(artifact_dir, 'data'))
artifact_dir = ds.get(dataset_name="vodafone_dataset_preprocessed_test_dataloader", dataset_project="vodafone dataset_full").get_local_copy()
print(os.listdir(artifact_dir))
eval_dataset = load_from_disk(os.path.join(artifact_dir, 'data'))
print(input_train_data, input_test_data)
print(train_dataset.shape)
print(eval_dataset.shape)
example_configuration = {
'epochs': 5,
'lr': 0.00001,
'optimizer': 'adam'
}
task.connect(example_configuration)
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment")
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment")
metric = load_metric('f1')
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return metric.compute(predictions=predictions, references=labels , average='micro')
torch.cuda.empty_cache()
training_args = TrainingArguments(
num_train_epochs=example_configuration['epochs'],
logging_steps=5,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
evaluation_strategy="epoch",
learning_rate=example_configuration['lr'],
save_steps = 250,
save_total_limit = 2,
output_dir = 'output/',
report_to = 'clearml',
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train() `