#try: # from jupyterthemes import jtplot # jtplot.style() #except: # pass from keras.layers import Dense, Dropout, Activation, Cropping2D, Flatten, Convolution1D, Convolution2D, MaxPooling1D,MaxPooling2D from keras.models import Sequential import keras import keras.backend as K import numpy as np from keras.preprocessing.image import ImageDataGenerator from keras.utils import np_utils import TRex import shutil from keras.layers import Dense, Dropout, Activation, Cropping2D, Flatten, Convolution1D, Convolution2D, MaxPooling1D,MaxPooling2D, Lambda from keras.layers import SpatialDropout2D #### NETWORK LAYOUT def binary_focal_loss(gamma=2., alpha=.25): """ Binary form of focal loss. FL(p_t) = -alpha * (1 - p_t)**gamma * log(p_t) where p = sigmoid(x), p_t = p or 1 - p depending on if the label is 1 or 0, respectively. References: https://arxiv.org/pdf/1708.02002.pdf Usage: model.compile(loss=[binary_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam) """ def binary_focal_loss_fixed(y_true, y_pred): from keras import backend as K import tensorflow as tf """ :param y_true: A tensor of the same shape as `y_pred` :param y_pred: A tensor resulting from a sigmoid :return: Output tensor. """ pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) epsilon = K.epsilon() # clip to prevent NaN's and Inf's pt_1 = K.clip(pt_1, epsilon, 1. - epsilon) pt_0 = K.clip(pt_0, epsilon, 1. - epsilon) return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \ -K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0)) return binary_focal_loss_fixed def categorical_focal_loss(gamma=2., alpha=.25): """ Softmax version of focal loss. m FL = ∑ -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c) c=1 where m = number of classes, c = class and o = observation Parameters: alpha -- the same as weighing factor in balanced cross entropy gamma -- focusing parameter for modulating factor (1-p) Default value: gamma -- 2.0 as mentioned in the paper alpha -- 0.25 as mentioned in the paper References: Official paper: https://arxiv.org/pdf/1708.02002.pdf https://www.tensorflow.org/api_docs/python/tf/keras/backend/categorical_crossentropy Usage: model.compile(loss=[categorical_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam) """ def categorical_focal_loss_fixed(y_true, y_pred): from keras import backend as K import tensorflow as tf """ :param y_true: A tensor of the same shape as `y_pred` :param y_pred: A tensor resulting from a softmax :return: Output tensor. """ # Scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # Clip the prediction value to prevent NaN's and Inf's epsilon = K.epsilon() y_pred = K.clip(y_pred, epsilon, 1. - epsilon) # Calculate Cross Entropy cross_entropy = -y_true * K.log(y_pred) # Calculate Focal Loss loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy # Sum the losses in mini_batch return K.sum(loss, axis=1) return categorical_focal_loss_fixed def reinitialize_network(): global model, image_width, image_height, classes, learning_rate model = Sequential() TRex.log("initializing network:"+str(image_width)+","+str(image_height)+" "+str(len(classes))+" classes") model.add(Lambda(lambda x: (x / 127.5 - 1.0), input_shape=(int(image_width),int(image_height),1))) model.add(Convolution2D(16, 5, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(SpatialDropout2D(0.25)) #model.add(Dropout(0.5)) model.add(Convolution2D(64, 5, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(SpatialDropout2D(0.25)) #model.add(Dropout(0.5)) model.add(Convolution2D(100, 5, activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(SpatialDropout2D(0.25)) #model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(100, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(len(classes), activation='softmax')) #### / NETWORK LAYOUT import platform import importlib found = True try: importlib.import_module('tensorflow') import tensorflow except ImportError: found = False if found: model.compile(loss=#'categorical_crossentropy', categorical_focal_loss(gamma=2., alpha=.25), optimizer=keras.optimizers.Adam(lr=learning_rate), metrics=['accuracy']) else: model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(lr=learning_rate), metrics=['accuracy']) model.summary(print_fn=TRex.log) class UserCancelException(Exception): """Raised when user clicks cancel""" pass class ValidationCallback(keras.callbacks.Callback): def __init__(self, model, classes, X_test, Y_test, epochs, filename, prefix, output_path, compare_acc, estimate_uniqueness, settings): import numpy as np self.classes = classes self.model = model self.prefix = prefix self.output_path = output_path self.compare_acc = compare_acc self.estimate_uniqueness = estimate_uniqueness X = [] Y = [] for c in classes: mask = np.argmax(Y_test, axis=1) == c a = X_test[mask] X.append(a) Y.append(Y_test[mask]) #TRex.log(len(a), c) self.X_test = X self.Y_test = Y self.epoch = 0 self.batches = 0 self.minimal_loss_allowed = 1 self.uniquenesses = [] self.worst_values = [] self.mean_values = [] self.better_values = [] self.much_better_values = [] self.per_class_accuracy = {} self.losses = [] self.loss_diffs = [] self.epochs = epochs self.best_result = {"weights":{}, "unique":-1} self.filename = filename self.settings = settings def plot_comparison_raw(self, do_plot = True, length = -1): X_test = self.X_test Y_test = self.Y_test classes = self.classes matrix = np.zeros_like(np.ndarray(shape=(len(classes),len(classes))), dtype=float) result = np.zeros_like(np.ndarray(shape=(len(classes), 4), dtype=float)) predictions = [] for i, c, images, zeros in zip(np.arange(len(classes)), classes, X_test, Y_test): if len(images) == 0: continue Y = self.model.predict(images) predictions.append(Y) distance = np.abs(Y - zeros).sum(axis=1) result[i, 0] = np.median(np.argmax(Y, axis=1)) result[i, 1] = distance.std() result[i, 2] = np.median(Y.sum(axis=1)) #distance.mean() result[i, 3] = (np.argmax(Y, axis=1) == i).sum() / len(Y) return result, predictions def update_status(self, print_out = False, logs = {}): description = "epoch "+str(min(self.epoch+1, self.epochs))+"/"+str(self.epochs) if np.shape(self.X_test)[-1] > 0 and len(self.worst_values) > 0: description += " -- worst acc/class: {0:.3f}".format(self.worst_values[-1]) if self.best_result["unique"] > -1: description = description + " current best: "+str(float(int(self.best_result["unique"] * 10000)) / 100) + "%" if self.compare_acc > 0: description += " compare_acc: " + str(float(int(self.compare_acc * 10000)) / 100)+"%" if len(self.losses) >= 5: description += " loss_diff: " + str(float(int(abs(np.mean(self.loss_diffs[-5:])) / self.minimal_loss_allowed * 10000)) / 100)+"% of minimum" update_work_description(description) description = "[TRAIN] "+description if print_out: TRex.log(description+" "+str(logs)) def evaluate(self, epoch, save = True, logs = {}): classes = self.classes model = self.model global update_work_percent, set_stop_reason, set_per_class_accuracy, set_uniqueness_history update_work_percent(min(epoch + 1, self.epochs) / self.epochs) self.epoch = min(epoch + 1, self.epochs) if np.shape(self.X_test)[-1] > 0: result, predictions = self.plot_comparison_raw(do_plot = False, length = -1) for i in range(0, len(result[:, 3])): if not i in self.per_class_accuracy: self.per_class_accuracy[i] = [] self.per_class_accuracy[i].append(result[:, 3][i]) self.mean_values.append(np.mean(result[:, 3])) self.worst_values.append(np.min(result[:, 3])) set_per_class_accuracy(result[:, 3].astype(np.float)) worst_acc_per_class = np.min(result[:, 3]) else: result = None worst_acc_per_class = -1 unique = self.estimate_uniqueness() if not save: return unique self.uniquenesses.append(unique) set_uniqueness_history(self.uniquenesses) # check whether our worst value improved, but only use it if it wasnt the first epoch if unique > self.best_result["unique"] and (self.compare_acc <= 0 or unique >= self.compare_acc**2): self.best_result["unique"] = unique self.better_values.append((epoch, unique)) TRex.log("\t(saving) new best-worst-value: "+str(unique)) if unique >= self.compare_acc: self.much_better_values.append((epoch, unique)) weights ={} for i, layer in zip(range(len(model.layers)), model.layers): h=layer.get_weights() #TRex.log(i, len(h), layer.get_config()["name"]) if len(h) > 0: weights[i] = h global output_path self.best_result["weights"] = weights try: TRex.log("\t(saving weights as "+output_path+"_progress.npz)") np.savez(output_path+"_progress.npz", weights = np.array(weights, dtype="object")) except Exception as e: TRex.warn(str(e)) else: TRex.log("\t(not saving) old best value is "+str(self.best_result["unique"])+" / "+str(unique)) # not reaching a good enough result in N epochs if self.compare_acc > 0 and len(self.uniquenesses) >= 5 and self.best_result["unique"] < self.compare_acc**2: set_stop_reason("uniqueness stayed below "+str(int((self.compare_acc**2) * 1000) / 100.0)+"% in the first epochs") TRex.log("[STOP] best result is below "+str(self.compare_acc**2)+" even after "+str(epoch)+" epochs.") self.model.stop_training = True if "val_loss" in logs: key = "val_loss" else: key = "loss" if "val_acc" in logs: akey = "val_acc" else: akey = "acc" assert key in logs current_loss = logs[key] previous_loss = np.finfo(np.float).max if len(self.losses) > 0: l = np.nanmean(self.losses[-5:]) if not np.isnan(l): previous_loss = l self.losses.append(current_loss) if len(self.losses) > 1: mu = np.mean(self.losses[-5:-1]) self.loss_diffs.append((current_loss - mu)) loss_diff = max(0.00000001, previous_loss - current_loss) self.minimal_loss_allowed = 0.05*10**(int(np.log10(current_loss))-1) change = np.diff(self.losses) if len(self.losses) > 1: TRex.log("\tminimal_loss_allowed: "+str(self.minimal_loss_allowed)+" -> mu: "+str(mu)+" current diffs: "+str(self.loss_diffs)+" average:"+str(np.mean(self.loss_diffs[-5:]))) if not self.model.stop_training: #if self.compare_acc > -1 and "val_acc" in logs and len(self.worst_values) >= 5 and logs["val_acc"] < self.compare_acc**3: # TRex.log("[STOP] val_acc is below "+str(self.compare_acc**3)+" even after "+str(len(self.worst_values))+" epochs.") # self.model.stop_training = True # check for accuracy plateau TRex.log("-- worst_value "+str(self.worst_values[-2:])) if not self.model.stop_training and len(self.worst_values) >= 2 and self.settings["accumulation_step"] >= -1: acc = np.array(self.worst_values[-2:]) #logs[akey][-2:] if (acc > 0.97).all() or worst_acc_per_class >= 0.99: TRex.log("[STOP] "+str(acc)+" in "+akey+" has been > 0.97 for consecutive epochs. terminating.") set_stop_reason(akey+" good enough ("+str(acc)+")") self.model.stop_training = True # check whether we are plateauing at a certain uniqueness level for a long time if not self.model.stop_training and len(self.uniquenesses) >= 10 and self.settings["accumulation_step"] > 0: acc = np.diff(self.uniquenesses[-10:]).mean() #logs[akey][-2:] TRex.log("Uniqueness plateau check:"+str(np.diff(self.uniquenesses[-10:]))+" -> "+str(acc)) if acc <= 0.01: set_stop_reason("uniqueness plateau") TRex.log("[STOP] Uniqueness has been plateauing for several epochs. terminating. "+str(acc)) self.model.stop_training = True if not self.model.stop_training and len(self.losses) > 1: #if len(self.losses) >= 5 and np.abs(loss_diff) < minimal_loss_allowed: # TRex.log("[STOP] Loss is very small (epoch "+str(len(self.losses))+"). stopping. loss was "+str(current_loss)+" - "+str(previous_loss)+" = "+str(loss_diff)) # self.model.stop_training = True # check for loss plateau if not self.model.stop_training: if len(self.losses) >= 5 and abs(np.mean(self.loss_diffs[-5:])) < self.minimal_loss_allowed: #if len(self.losses) >= 5 and (np.array(self.loss_diffs[-2:]) < self.minimal_loss_allowed).all(): if self.settings["accumulation_step"] > 0: self.model.stop_training = True set_stop_reason("small loss in consecutive epochs") TRex.log("[STOP] Loss is very small in consecutive epochs (epoch "+str(epoch)+"). stopping. loss was "+str(current_loss)+" vs. "+str(mu)+" "+str(self.loss_diffs[-2:])) else: TRex.log("(skipping small loss stopping criterion in first accumulation step)") elif len(change) >= 2: TRex.log("\t"+str(current_loss)+" => "+str(current_loss - mu)+" ("+str(current_loss)+" / "+str(mu)+")") count = 0 for i in range(-10,-1): v = change[i:i+1] if len(v) > 0 and v[0] >= 0: count += 1 elif len(v) > 0: count = 0 TRex.log("\tcounted "+str(count)+" increases in loss in consecutive epochs - "+str(change)) if count >= 4: # we seem to have started overfitting set_stop_reason("overfitting") TRex.log("[STOP] overfitting. stopping with loss diffs: "+str(change)) self.model.stop_training = True self.update_status(True, logs=logs) self.batches = 0 return unique def on_epoch_end(self, epoch, logs={}): worst_value = self.evaluate(epoch, True, logs) global gui_terminated if gui_terminated(): global UserCancelException self.model.stop_training = True #TRex.log("aborting because we have been asked to by main") raise UserCancelException() def on_batch_end(self, batch, logs={}): global gui_terminated if gui_terminated(): global UserCancelException self.model.stop_training = True raise UserCancelException() self.batches += 1 if batch % 50 == 0: self.update_status(logs=logs) global update_work_percent epoch = self.epoch epoch += self.batches / self.settings["per_epoch"] update_work_percent((epoch) / self.epochs) #logs = logs or {} #batch_size = logs.get('size', 0) # In case of distribution strategy we can potentially run multiple steps # at the same time, we should account for that in the `seen` calculation. #num_steps = logs.get('num_steps', 1) #if self.use_steps: # self.seen += num_steps #else: # self.seen += batch_size * num_steps #self.display_step += 1 # Skip progbar update for the last batch; # will be handled by on_epoch_end. #if self.verbose and self.seen < self.target and self.display_step % #self.display_per_batches == 0: # self.progbar.update(self.seen, self.log_values) def predict(): global receive, images, model train_X = np.array(images, copy=False) if len(train_X.shape) != 4: print("error with the shape") indexes = np.array(np.arange(len(train_X)), dtype=np.float32) output = np.array(model.predict(train_X), dtype=np.float32) receive(output, indexes) del output del train_X del indexes del images def start_learning(): global best_accuracy_worst_class, max_epochs, image_width, image_height global output_path, classes, learning_rate, accumulation_step, global_segment global batch_size, X_val, Y_val, X, Y, run_training, save_weights_after, do_save_training_images epochs = max_epochs #batch_size = 32 move_range = min(0.05, 2 / min(image_width, image_height)) TRex.log("setting move_range as "+str(move_range)) settings = { "epochs": max_epochs, "batch_size": batch_size, "move_range": move_range, "output_path": output_path, "image_width": image_width, "image_height": image_height, "classes": np.array(classes,dtype=int), "learning_rate": learning_rate, "accumulation_step": accumulation_step, "global_segment": np.array(global_segment, dtype=int), "per_epoch" : -1, "min_iterations": 2 #"min_acceptable_value": 0.98 } X_test = np.array(X_val, copy=False) #True, dtype = float) / 255.0 Y_test = np.array(Y_val, dtype=float) original_classes_test = np.copy(Y_test) Y_test = np_utils.to_categorical(Y_test, len(classes)) X_train = np.array(X, copy=False) Y_train = np.array(Y, dtype=float) original_classes = np.copy(Y_train) #test_original_classes = np.copy(test_Y) Y_train = np_utils.to_categorical(Y_train, len(classes)) #Y_test = np_utils.to_categorical(test_Y, len(classes)) TRex.log("Python received "+str(X_train.shape)+" training images ("+str(Y_train.shape)+") and "+str(X_test.shape)+" validation images ("+str(Y_test.shape)+")") mi = 0 for i, c in zip(np.arange(len(classes)), classes): mi = max(mi, len(Y_train[np.argmax(Y_train, axis=1) == c])) per_epoch = max(settings["min_iterations"], int(len(X_train) // batch_size ) * 0.5 ) #* 2.0) # i am using augmentation settings["per_epoch"] = per_epoch TRex.log(str(settings)) per_class = {} m = 0 cvalues = [] for i, c in zip(np.arange(len(classes)), classes): per_class[i] = len(Y_train[np.argmax(Y_train, axis=1) == c]) cvalues.append(per_class[i]) if per_class[i] > m: m = per_class[i] TRex.log("# [init] samples per class "+str(per_class)) def preprocess(images): dtype = images.dtype assert images.max() <= 255 and images.max() > 1 #TRex.log(images.shape, images.dtype) alpha = 1.0 if len(images.shape) == 3: alpha = np.random.uniform(0.85, 1.15) else: alpha = [] for i in range(len(images)): alpha.append(np.random.uniform(0.85, 1.15)) alpha = np.array(alpha) images = images.astype(np.float) * alpha return np.clip(images, 0, 255).astype(dtype) datagen = ImageDataGenerator(#rescale = 1.0/255.0, #rotation_range = 360, #brightness_range=(0.5,1.5), width_shift_range=move_range, height_shift_range=move_range, cval = 0, fill_mode = "constant", #preprocessing_function=preprocess, #use_multiprocessing=True ) cvalues = np.array(cvalues) TRex.log("# [init] weights per class "+str(per_class)) TRex.log("# [training] data shapes: train "+str(X_train.shape)+" "+str(Y_train.shape)+" test "+str(Y_test.shape)+" "+str(classes)) if do_save_training_images(): try: np.savez(output_path+"_training_images.npz", X_train=X_train, Y_train=Y_train, classes=classes) except: pass abort_with_error = False try: if run_training: update_work_percent(0.0) callback = ValidationCallback(model, classes, X_test, Y_test, epochs, filename, output_prefix+"_"+str(accumulation_step), output_path, best_accuracy_worst_class, estimate_uniqueness, settings) validation_data = (X_test, Y_test) if len(X_test) == 0: validation_data = None history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), validation_data=validation_data, steps_per_epoch=per_epoch, epochs=epochs, callbacks = [ callback ], #class_weight = per_class ) model_json = model.to_json() with open(output_path+".json", "w") as f: f.write(model_json) if callback.best_result["unique"] != -1: weights = callback.best_result["weights"] history_path = output_path+"_"+str(accumulation_step)+"_history.npz" TRex.log("saving histories at '"+history_path+"'") np.savez(history_path, history=np.array(history.history, dtype="object"), uniquenesses=callback.uniquenesses, better_values=callback.better_values, much_better_values=callback.much_better_values, worst_values=callback.worst_values, mean_values=callback.mean_values, settings=np.array(settings, dtype="object"), per_class_accuracy=np.array(callback.per_class_accuracy), samples_per_class=np.array(per_class, dtype="object")) if save_weights_after: np.savez(output_path+".npz", weights = np.array(weights, dtype="object")) TRex.log("saved model to "+output_path+" with accuracy of "+str(callback.best_result["unique"])) try: for i, layer in zip(range(len(model.layers)), model.layers): if i in weights: layer.set_weights(weights[i]) if len(X_train) > 0: callback.evaluate(epochs, False) best_accuracy_worst_class = callback.best_result["unique"] except: TRex.warn("loading weights failed") else: TRex.warn("could not improve upon previous steps.") abort_with_error = True else: # just run the test update_work_percent(1.0) callback = ValidationCallback(model, classes, X_test, Y_test, epochs, filename, output_prefix, output_path, best_accuracy_worst_class, estimate_uniqueness, settings) if len(X_train) > 0: callback.evaluate(0, False) except UserCancelException as e: #TRex.warn("exception during training: "+str(e)) abort_with_error = True TRex.log("Ending training because user cancelled the action.") del X_train del Y_train del X_test del Y_test del X del Y del callback del X_val del Y_val if "validation_data" in locals(): del validation_data if "datagen" in locals(): del datagen if "weights" in locals(): del weights import gc gc.collect() if abort_with_error: raise Exception("aborting with error")