This works:filepath = self.log_dir + os.sep + "checkpoint" self.callbacks.append( ModelCheckpoint( filepath, monitor="val_loss", mode="min", save_best_only=True, save_weights_only=True, ) )
And this doesnβt:filepath = self.log_dir + os.sep + "checkpoint.hdf5" self.callbacks.append( ModelCheckpoint( filepath, monitor="val_loss", mode="min", save_best_only=True, save_weights_only=True, ) )
ModelCheckpoint('best_model', save_best_only=True)
That worked for me now, what's the diff
GrievingTurkey78 please feel free to send me code snippets to test π
Basically one points to an hdf5 and the other one has no extensiion
I changed it to point to a folder and it shows up
This is what I just used:
` import os
from argparse import ArgumentParser
from tensorflow.keras import utils as np_utils
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Activation, Dense, Softmax
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from clearml import Task
parser = ArgumentParser()
parser.add_argument('--output-uri', type=str, required=False)
args = parser.parse_args()
the data, shuffled and split between train and test sets
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784).astype('float32')/255.
X_test = X_test.reshape(10000, 784).astype('float32')/255.
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
model = Sequential()
model.add(Dense(10, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Softmax())
model.summary()
output_folder = os.path.join(os.getcwd(), 'tmp')
model.compile(loss='categorical_crossentropy',
optimizer=Adam(),
metrics=['accuracy'])
model_checkpoint = ModelCheckpoint('best_model', save_best_only=True)
Connecting ClearML
task = Task.init(project_name='examples',
task_name='Upload problem',
output_uri=args.output_uri)
history = model.fit(X_train, Y_train,
batch_size=128,
epochs=5,
callbacks=[model_checkpoint],
verbose=1,
validation_data=(X_test, Y_test))
os.makedirs(output_folder, exist_ok=True)
model.save(os.path.join(output_folder, 'model.h5'))
print('Number of output models: {}'.format(len(task.models["output"]))) `
AgitatedDove14 Thanks! Im trying to figure out how to create a minimum working example! I am also working with Hydra so that may be a thing. The extension is whats causing it to fail (havenβt figured out why).
Oh my bad, post 0.17.5 π
RC will be out soon, in the meantime you can install directly from github:pip install git+
If you try:ModelCheckpoint('best_model.hdf5', save_best_only=True)
does it work too?
Yey @ https://app.slack.com/team/U01CJ43KX2N this one does not work!
Give me a minute I'll
Hi GrievingTurkey78
I think it is already fixed with 0.17.5, no?
Hi AgitatedDove14 ! Do you have any updates on this?
Thanks Martin! Iβll keep checking π
GrievingTurkey78 are you able to reproduce it?
Thanks AgitatedDove14 ! seems to be subclassed model + extension
GrievingTurkey78 I have to admit I can't see the difference, can you help me out π
It works perfectly! AgitatedDove14 There is something weird on my side π’
Hey AgitatedDove14 does this work for you?
` from argparse import ArgumentParser
from tensorflow.keras import utils as np_utils
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf
from clearml import Task
class Linear(tf.keras.Model):
def init(self, in_shape=(784,), num_classes=10):
super().init()
self.linear = Dense(num_classes, input_shape=in_shape, activation="softmax")
def call(self, inputs, training=None, mask=None):
return self.linear(inputs)
def main():
parser = ArgumentParser()
parser.add_argument("--output-uri", type=str, required=False)
args = parser.parse_args()
# the data, shuffled and split between train and test sets
nb_classes = 10
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784).astype("float32") / 255.0
X_test = X_test.reshape(10000, 784).astype("float32") / 255.0
print(X_train.shape[0], "train samples")
print(X_test.shape[0], "test samples")
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
model = Linear()
model.compile(
loss="categorical_crossentropy", optimizer=Adam(), metrics=["accuracy"]
)
model_checkpoint = ModelCheckpoint(
"best_model.hdf5", save_best_only=True, save_weights_only=True
)
# Connecting ClearML
task = Task.init(
project_name="examples", task_name="Upload problem", output_uri=args.output_uri
)
history = model.fit(
X_train,
Y_train,
batch_size=128,
epochs=5,
callbacks=[model_checkpoint],
verbose=1,
validation_data=(X_test, Y_test),
)
if name == "main":
main() `
Hey AgitatedDove14 after playing around seems that if the callback filepath points to an hdf5 file it is not uploaded.
Hmm I think this was the fix (only with TF2.4), let me check a sec
Funny it's the extension "h5" , it is a different execution path inside keras...
Let me see what can be done π