Hey AgitatedDove14 after playing around seems that if the callback filepath points to an hdf5 file it is not uploaded.
Basically one points to an hdf5 and the other one has no extensiion
ModelCheckpoint('best_model', save_best_only=True)
That worked for me now, what's the diff
If you try:ModelCheckpoint('best_model.hdf5', save_best_only=True)
does it work too?
I changed it to point to a folder and it shows up
GrievingTurkey78 are you able to reproduce it?
This is what I just used:
` import os
from argparse import ArgumentParser
from tensorflow.keras import utils as np_utils
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation, Dense, Softmax
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from clearml import Task
parser = ArgumentParser()
parser.add_argument('--output-uri', type=str, required=False)
args = parser.parse_args()
the data, shuffled and split between train and test sets
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784).astype('float32')/255.
X_test = X_test.reshape(10000, 784).astype('float32')/255.
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
model = Sequential()
model.add(Dense(10, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Softmax())
model.summary()
output_folder = os.path.join(os.getcwd(), 'tmp')
model.compile(loss='categorical_crossentropy',
optimizer=Adam(),
metrics=['accuracy'])
model_checkpoint = ModelCheckpoint('best_model', save_best_only=True)
Connecting ClearML
task = Task.init(project_name='examples',
task_name='Upload problem',
output_uri=args.output_uri)
history = model.fit(X_train, Y_train,
batch_size=128,
epochs=5,
callbacks=[model_checkpoint],
verbose=1,
validation_data=(X_test, Y_test))
os.makedirs(output_folder, exist_ok=True)
model.save(os.path.join(output_folder, 'model.h5'))
print('Number of output models: {}'.format(len(task.models["output"]))) `
Hi GrievingTurkey78
I think it is already fixed with 0.17.5, no?
Hi AgitatedDove14 ! Do you have any updates on this?
This works:filepath = self.log_dir + os.sep + "checkpoint" self.callbacks.append( ModelCheckpoint( filepath, monitor="val_loss", mode="min", save_best_only=True, save_weights_only=True, ) )
And this doesnβt:filepath = self.log_dir + os.sep + "checkpoint.hdf5" self.callbacks.append( ModelCheckpoint( filepath, monitor="val_loss", mode="min", save_best_only=True, save_weights_only=True, ) )
AgitatedDove14 Thanks! Im trying to figure out how to create a minimum working example! I am also working with Hydra so that may be a thing. The extension is whats causing it to fail (havenβt figured out why).
GrievingTurkey78 I have to admit I can't see the difference, can you help me out π
Hmm I think this was the fix (only with TF2.4), let me check a sec
Thanks AgitatedDove14 ! seems to be subclassed model + extension
GrievingTurkey78 please feel free to send me code snippets to test π
Funny it's the extension "h5" , it is a different execution path inside keras...
Let me see what can be done π
It works perfectly! AgitatedDove14 There is something weird on my side π’
Hey AgitatedDove14 does this work for you?
` from argparse import ArgumentParser
from tensorflow.keras import utils as np_utils
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf
from clearml import Task
class Linear(tf.keras.Model):
def init(self, in_shape=(784,), num_classes=10):
super().init()
self.linear = Dense(num_classes, input_shape=in_shape, activation="softmax")
def call(self, inputs, training=None, mask=None):
return self.linear(inputs)
def main():
parser = ArgumentParser()
parser.add_argument("--output-uri", type=str, required=False)
args = parser.parse_args()
# the data, shuffled and split between train and test sets
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784).astype("float32") / 255.0
X_test = X_test.reshape(10000, 784).astype("float32") / 255.0
print(X_train.shape[0], "train samples")
print(X_test.shape[0], "test samples")
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
model = Linear()
model.compile(
loss="categorical_crossentropy", optimizer=Adam(), metrics=["accuracy"]
)
model_checkpoint = ModelCheckpoint(
"best_model.hdf5", save_best_only=True, save_weights_only=True
)
# Connecting ClearML
task = Task.init(
project_name="examples", task_name="Upload problem", output_uri=args.output_uri
)
history = model.fit(
X_train,
Y_train,
batch_size=128,
epochs=5,
callbacks=[model_checkpoint],
verbose=1,
validation_data=(X_test, Y_test),
)
if name == "main":
main() `
Thanks Martin! Iβll keep checking π
Yey @ https://app.slack.com/team/U01CJ43KX2N this one does not work!
Give me a minute I'll
Oh my bad, post 0.17.5 π
RC will be out soon, in the meantime you can install directly from github:pip install git+