Module monk.tf_keras_1.finetune.level_3_training_base

Expand source code
from tf_keras_1.finetune.imports import *
from system.imports import *
from tf_keras_1.finetune.level_2_model_base import finetune_model



class finetune_training(finetune_model):
    '''
    Base class for training and associated functions

    Args:
        verbose (int): Set verbosity levels
                        0 - Print Nothing
                        1 - Print desired details
    '''


    def __init__(self, verbose=1):
        super().__init__(verbose=verbose);



    ###############################################################################################################################################
    def get_training_estimate(self):
        '''
        Get estimated time for training a single epoch based on all set parameters

        Args:
            None

        Returns:
            float: Total time per epoch in seconds
        '''
        total_time_per_epoch = 0;


        self.system_dict = load_scheduler(self.system_dict);
        self.system_dict = load_optimizer(self.system_dict);
        self.system_dict = load_loss(self.system_dict);

        self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
                loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);

        time_callback = TimeHistory();
        initial_epoch = 0;

        step_size_estimate = self.system_dict["local"]["data_loaders"]["estimate"].n//self.system_dict["local"]["data_loaders"]["estimate"].batch_size;

        self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["estimate"],
                           steps_per_epoch=step_size_estimate,
                           epochs=1,
                           callbacks=[time_callback],
                           workers=psutil.cpu_count(),
                           initial_epoch = initial_epoch,
                           verbose=0)

        time_taken = time_callback.times[0];
        num_images = len(self.system_dict["local"]["data_loaders"]["estimate"].labels);
        time_taken_per_image = time_taken/num_images;

        total_time_per_epoch = time_taken_per_image*(self.system_dict["dataset"]["params"]["num_train_images"] + 
            self.system_dict["dataset"]["params"]["num_val_images"]);

        return total_time_per_epoch;

    ###############################################################################################################################################
        


    ###############################################################################################################################################
    def set_training_final(self):
        '''
        Main training function

        Args:
            None

        Returns:
            None
        '''
        if(self.system_dict["states"]["resume_train"]):
            self.custom_print("Training Resume");
            total_time_per_epoch = 0;

            self.system_dict = load_scheduler(self.system_dict);
            self.system_dict = load_optimizer(self.system_dict);
            self.system_dict = load_loss(self.system_dict);

            self.system_dict["training"]["status"] = False;

            pid = os.getpid();


            log_dir = self.system_dict["log_dir_relative"];
            model_dir = self.system_dict["model_dir_relative"];
            intermediate_model_prefix = self.system_dict["training"]["settings"]["intermediate_model_prefix"];
            save_intermediate_models = self.system_dict["training"]["settings"]["save_intermediate_models"];
            display_progress_realtime = self.system_dict["training"]["settings"]["display_progress_realtime"];
            display_progress = self.system_dict["training"]["settings"]["display_progress"];
            num_epochs = self.system_dict["hyper-parameters"]["num_epochs"];

            f = open(self.system_dict["log_dir"] + "/model_history_log.csv", 'r');
            lines = f.readlines();
            f.close();
            epochs_completed = len(lines)-1;

            if(self.system_dict["training"]["settings"]["save_training_logs"]):
                history_df = pd.read_csv(self.system_dict["log_dir"] + "/model_history_log.csv");
                val_acc_history = history_df['val_acc'].tolist();
                train_acc_history = history_df['acc'].tolist();
                val_loss_history = history_df['val_loss'].tolist();
                train_loss_history = history_df['loss'].tolist();

            f = open(self.system_dict["log_dir"] + "/times.txt", 'r');
            lines = f.readlines();
            times_history = [];
            for i in range(len(lines)):
                times_history.append(float(lines[i][:len(lines[i])-1]));


            csv_logger = krc.CSVLogger(log_dir + "model_history_log.csv", append=True);

            if(not self.system_dict["verbose"]):
                verbose=0;
            elif(display_progress_realtime):
                verbose=1;
            elif(display_progress):
                verbose=2;
            else:
                verbose=0;
            

            ckpt_all = krc.ModelCheckpoint(model_dir + intermediate_model_prefix + '{epoch:02d}.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);

            ckpt_best = krc.ModelCheckpoint(model_dir + 'best_model.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=True, save_weights_only=False, mode='auto', period=1);

            resume = krc.ModelCheckpoint(model_dir + 'resume_state.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);
            
            time_callback = TimeHistory(log_dir)
            memory_callback = MemoryHistory();


            callbacks = [krc.History(), memory_callback, time_callback, csv_logger, resume, ckpt_best];

            if(save_intermediate_models):
                callbacks.append(ckpt_all);

            if(self.system_dict["local"]["learning_rate_scheduler"]):
                if(self.system_dict["hyper-parameters"]["learning_rate_scheduler"]["name"] == "reduceonplateaulr"):
                    callbacks.append(self.system_dict["local"]["learning_rate_scheduler"]);
                else:
                    callbacks.append(krc.LearningRateScheduler(self.system_dict["local"]["learning_rate_scheduler"]));


            step_size_train = self.system_dict["local"]["data_loaders"]["train"].n//self.system_dict["local"]["data_loaders"]["train"].batch_size;
            step_size_val = self.system_dict["local"]["data_loaders"]["val"].n//self.system_dict["local"]["data_loaders"]["val"].batch_size;


            initial_epoch = epochs_completed;

            


            self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
                loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);


            history = self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["train"],
                               steps_per_epoch=step_size_train,
                               epochs=num_epochs,
                               callbacks=callbacks,
                               validation_data=self.system_dict["local"]["data_loaders"]["val"],
                               validation_steps=step_size_val,
                               initial_epoch = initial_epoch,
                               verbose=verbose);

            time_elapsed_since = 0;
            times_history += time_callback.times
            for i in range(len(times_history)):
                time_elapsed_since += times_history[i];


            self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);

            if(keras.__version__.split(".")[1] == "3"):
                val_acc_history = history.history['val_accuracy'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['accuracy'];
                train_loss_history = history.history['loss'];
            else:
                val_acc_history = history.history['val_acc'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['acc'];
                train_loss_history = history.history['loss'];


            self.system_dict["training"]["outputs"]["best_val_acc"] = max(val_acc_history);
            self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = val_acc_history.index(self.system_dict["training"]["outputs"]["best_val_acc"]);
            self.system_dict["training"]["outputs"]["max_gpu_memory_usage"] = memory_callback.max_gpu_usage;


            if(self.system_dict["training"]["settings"]["save_training_logs"]):
                np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
                np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
                np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
                np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));

                

            if(self.system_dict["training"]["settings"]["display_progress"]):
                self.custom_print('    Training completed in: {:.0f}m {:.0f}s'.format(time_elapsed_since // 60, time_elapsed_since % 60))
                self.custom_print('    Best val Acc:          {:4f}'.format(self.system_dict["training"]["outputs"]["best_val_acc"]))
                self.custom_print("");



        elif(self.system_dict["states"]["eval_infer"]):
            msg = "Cannot train in testing (eval_infer) mode.\n";
            msg += "Tip - use new_experiment function with a copy_from argument.\n";
            raise ConstraintError(msg);



        else:
            self.custom_print("Training Start");

            self.system_dict = load_scheduler(self.system_dict);
            self.system_dict = load_optimizer(self.system_dict);
            self.system_dict = load_loss(self.system_dict);

            self.system_dict["training"]["status"] = False;

            pid = os.getpid();


            log_dir = self.system_dict["log_dir_relative"];
            model_dir = self.system_dict["model_dir_relative"];
            intermediate_model_prefix = self.system_dict["training"]["settings"]["intermediate_model_prefix"];
            save_intermediate_models = self.system_dict["training"]["settings"]["save_intermediate_models"];
            display_progress_realtime = self.system_dict["training"]["settings"]["display_progress_realtime"];
            display_progress = self.system_dict["training"]["settings"]["display_progress"];
            num_epochs = self.system_dict["hyper-parameters"]["num_epochs"];


            if(not self.system_dict["verbose"]):
                verbose=0;
            elif(display_progress_realtime):
                verbose=1;
            elif(display_progress):
                verbose=2;
            else:
                verbose=0;


            csv_logger = krc.CSVLogger(log_dir + "model_history_log.csv", append=False);
            ckpt_all = krc.ModelCheckpoint(model_dir + intermediate_model_prefix + '{epoch:02d}.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);

            ckpt_best = krc.ModelCheckpoint(model_dir + 'best_model.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=True, save_weights_only=False, mode='auto', period=1);

            resume = krc.ModelCheckpoint(model_dir + 'resume_state.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);
            
            time_callback = TimeHistory(log_dir)
            memory_callback = MemoryHistory();


            callbacks = [krc.History(), memory_callback, time_callback, csv_logger, resume, ckpt_best];

            if(save_intermediate_models):
                callbacks.append(ckpt_all);

            if(self.system_dict["local"]["learning_rate_scheduler"]):
                if(self.system_dict["hyper-parameters"]["learning_rate_scheduler"]["name"] == "reduceonplateaulr"):
                    callbacks.append(self.system_dict["local"]["learning_rate_scheduler"]);
                else:
                    callbacks.append(krc.LearningRateScheduler(self.system_dict["local"]["learning_rate_scheduler"]));


            step_size_train = self.system_dict["local"]["data_loaders"]["train"].n//self.system_dict["local"]["data_loaders"]["train"].batch_size;
            step_size_val = self.system_dict["local"]["data_loaders"]["val"].n//self.system_dict["local"]["data_loaders"]["val"].batch_size;


            initial_epoch = 0;

            if(not self.system_dict["verbose"]):
                verbose=0;
            elif(display_progress_realtime):
                verbose=1;
            elif(display_progress):
                verbose=2;
            else:
                verbose=0;


            self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
                loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);


            history = self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["train"],
                               steps_per_epoch=step_size_train,
                               epochs=num_epochs,
                               callbacks=callbacks,
                               validation_data=self.system_dict["local"]["data_loaders"]["val"],
                               validation_steps=step_size_val,
                               initial_epoch = initial_epoch,
                               verbose=verbose);



            time_elapsed_since = 0;
            for i in range(len(time_callback.times)):
                time_elapsed_since += time_callback.times[i];


            self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);

            if(keras.__version__.split(".")[1] == "3"):
                val_acc_history = history.history['val_accuracy'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['accuracy'];
                train_loss_history = history.history['loss'];
            else:
                val_acc_history = history.history['val_acc'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['acc'];
                train_loss_history = history.history['loss'];

            self.system_dict["training"]["outputs"]["best_val_acc"] = max(val_acc_history);
            self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = val_acc_history.index(self.system_dict["training"]["outputs"]["best_val_acc"]);
            self.system_dict["training"]["outputs"]["max_gpu_memory_usage"] = memory_callback.max_gpu_usage;


            if(self.system_dict["training"]["settings"]["save_training_logs"]):
                np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
                np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
                np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
                np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));

               


            if(self.system_dict["training"]["settings"]["display_progress"]):
                self.custom_print('    Training completed in: {:.0f}m {:.0f}s'.format(time_elapsed_since // 60, time_elapsed_since % 60))
                self.custom_print('    Best val Acc:          {:4f}'.format(self.system_dict["training"]["outputs"]["best_val_acc"]))
                self.custom_print("");

            

        self.system_dict["local"]["model"].save(self.system_dict["model_dir_relative"] + "final.h5");
        self.system_dict["training"]["status"] = True;
        save(self.system_dict);



        if(not self.system_dict["states"]["eval_infer"]):
            self.custom_print("Training End");
            self.custom_print("");
            self.system_dict["training"]["outputs"]["best_val_acc"] = "{:4f}".format(self.system_dict["training"]["outputs"]["best_val_acc"]);
            self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"];
            self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);
            self.system_dict["training"]["outputs"]["max_gpu_usage"] = str(self.system_dict["training"]["outputs"]["max_gpu_memory_usage"]) + " Mb";


            if(self.system_dict["training"]["settings"]["save_training_logs"]): 
                self.custom_print("Training Outputs");
                self.custom_print("    Model Dir:   {}".format(self.system_dict["model_dir"]));
                self.custom_print("    Log Dir:     {}".format(self.system_dict["log_dir"]));
                self.custom_print("    Final model: {}".format("final"));
                self.custom_print("    Best model:  {}".format("best_model"));
                self.custom_print("    Log 1 - Validation accuracy history log: {}".format("val_acc_history.npy"));
                self.custom_print("    Log 2 - Validation loss history log:     {}".format("val_loss_history.npy"));
                self.custom_print("    Log 3 - Training accuracy history log:   {}".format("train_acc_history.npy"));
                self.custom_print("    Log 4 - Training loss history log:       {}".format("train_loss_history.npy"));
                self.custom_print("    Log 5 - Training curve:                  {}".format("train_loss_history.npy"));
                self.custom_print("    Log 6 - Validation curve:                {}".format("train_loss_history.npy"));
                self.custom_print("");

                np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
                np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
                np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
                np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));
                
                self.system_dict["training"]["outputs"]["log_val_acc_history"] = self.system_dict["log_dir"] + "val_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_val_loss_history"] = self.system_dict["log_dir"] + "val_loss_history.npy";
                self.system_dict["training"]["outputs"]["log_train_acc_history"] = self.system_dict["log_dir"] + "train_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_train_loss_history"] = self.system_dict["log_dir"] + "train_loss_history.npy";

                self.system_dict["training"]["outputs"]["log_val_acc_history_relative"] = self.system_dict["log_dir_relative"] + "val_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_val_loss_history_relative"] = self.system_dict["log_dir_relative"] + "val_loss_history.npy";
                self.system_dict["training"]["outputs"]["log_train_acc_history_relative"] = self.system_dict["log_dir_relative"] + "train_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_train_loss_history_relative"] = self.system_dict["log_dir_relative"] + "train_loss_history.npy";


                create_train_test_plots_accuracy([train_acc_history, val_acc_history], ["Epoch Num", "Accuracy"], self.system_dict["log_dir"], show_img=False, save_img=True);
                create_train_test_plots_loss([train_loss_history, val_loss_history], ["Epoch Num", "Loss"], self.system_dict["log_dir"], show_img=False, save_img=True);

            self.system_dict["training"]["status"] = True;

        ###############################################################################################################################################

Classes

class finetune_training (verbose=1)

Base class for training and associated functions

Args

verbose : int
Set verbosity levels 0 - Print Nothing 1 - Print desired details
Expand source code
class finetune_training(finetune_model):
    '''
    Base class for training and associated functions

    Args:
        verbose (int): Set verbosity levels
                        0 - Print Nothing
                        1 - Print desired details
    '''


    def __init__(self, verbose=1):
        super().__init__(verbose=verbose);



    ###############################################################################################################################################
    def get_training_estimate(self):
        '''
        Get estimated time for training a single epoch based on all set parameters

        Args:
            None

        Returns:
            float: Total time per epoch in seconds
        '''
        total_time_per_epoch = 0;


        self.system_dict = load_scheduler(self.system_dict);
        self.system_dict = load_optimizer(self.system_dict);
        self.system_dict = load_loss(self.system_dict);

        self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
                loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);

        time_callback = TimeHistory();
        initial_epoch = 0;

        step_size_estimate = self.system_dict["local"]["data_loaders"]["estimate"].n//self.system_dict["local"]["data_loaders"]["estimate"].batch_size;

        self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["estimate"],
                           steps_per_epoch=step_size_estimate,
                           epochs=1,
                           callbacks=[time_callback],
                           workers=psutil.cpu_count(),
                           initial_epoch = initial_epoch,
                           verbose=0)

        time_taken = time_callback.times[0];
        num_images = len(self.system_dict["local"]["data_loaders"]["estimate"].labels);
        time_taken_per_image = time_taken/num_images;

        total_time_per_epoch = time_taken_per_image*(self.system_dict["dataset"]["params"]["num_train_images"] + 
            self.system_dict["dataset"]["params"]["num_val_images"]);

        return total_time_per_epoch;

    ###############################################################################################################################################
        


    ###############################################################################################################################################
    def set_training_final(self):
        '''
        Main training function

        Args:
            None

        Returns:
            None
        '''
        if(self.system_dict["states"]["resume_train"]):
            self.custom_print("Training Resume");
            total_time_per_epoch = 0;

            self.system_dict = load_scheduler(self.system_dict);
            self.system_dict = load_optimizer(self.system_dict);
            self.system_dict = load_loss(self.system_dict);

            self.system_dict["training"]["status"] = False;

            pid = os.getpid();


            log_dir = self.system_dict["log_dir_relative"];
            model_dir = self.system_dict["model_dir_relative"];
            intermediate_model_prefix = self.system_dict["training"]["settings"]["intermediate_model_prefix"];
            save_intermediate_models = self.system_dict["training"]["settings"]["save_intermediate_models"];
            display_progress_realtime = self.system_dict["training"]["settings"]["display_progress_realtime"];
            display_progress = self.system_dict["training"]["settings"]["display_progress"];
            num_epochs = self.system_dict["hyper-parameters"]["num_epochs"];

            f = open(self.system_dict["log_dir"] + "/model_history_log.csv", 'r');
            lines = f.readlines();
            f.close();
            epochs_completed = len(lines)-1;

            if(self.system_dict["training"]["settings"]["save_training_logs"]):
                history_df = pd.read_csv(self.system_dict["log_dir"] + "/model_history_log.csv");
                val_acc_history = history_df['val_acc'].tolist();
                train_acc_history = history_df['acc'].tolist();
                val_loss_history = history_df['val_loss'].tolist();
                train_loss_history = history_df['loss'].tolist();

            f = open(self.system_dict["log_dir"] + "/times.txt", 'r');
            lines = f.readlines();
            times_history = [];
            for i in range(len(lines)):
                times_history.append(float(lines[i][:len(lines[i])-1]));


            csv_logger = krc.CSVLogger(log_dir + "model_history_log.csv", append=True);

            if(not self.system_dict["verbose"]):
                verbose=0;
            elif(display_progress_realtime):
                verbose=1;
            elif(display_progress):
                verbose=2;
            else:
                verbose=0;
            

            ckpt_all = krc.ModelCheckpoint(model_dir + intermediate_model_prefix + '{epoch:02d}.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);

            ckpt_best = krc.ModelCheckpoint(model_dir + 'best_model.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=True, save_weights_only=False, mode='auto', period=1);

            resume = krc.ModelCheckpoint(model_dir + 'resume_state.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);
            
            time_callback = TimeHistory(log_dir)
            memory_callback = MemoryHistory();


            callbacks = [krc.History(), memory_callback, time_callback, csv_logger, resume, ckpt_best];

            if(save_intermediate_models):
                callbacks.append(ckpt_all);

            if(self.system_dict["local"]["learning_rate_scheduler"]):
                if(self.system_dict["hyper-parameters"]["learning_rate_scheduler"]["name"] == "reduceonplateaulr"):
                    callbacks.append(self.system_dict["local"]["learning_rate_scheduler"]);
                else:
                    callbacks.append(krc.LearningRateScheduler(self.system_dict["local"]["learning_rate_scheduler"]));


            step_size_train = self.system_dict["local"]["data_loaders"]["train"].n//self.system_dict["local"]["data_loaders"]["train"].batch_size;
            step_size_val = self.system_dict["local"]["data_loaders"]["val"].n//self.system_dict["local"]["data_loaders"]["val"].batch_size;


            initial_epoch = epochs_completed;

            


            self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
                loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);


            history = self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["train"],
                               steps_per_epoch=step_size_train,
                               epochs=num_epochs,
                               callbacks=callbacks,
                               validation_data=self.system_dict["local"]["data_loaders"]["val"],
                               validation_steps=step_size_val,
                               initial_epoch = initial_epoch,
                               verbose=verbose);

            time_elapsed_since = 0;
            times_history += time_callback.times
            for i in range(len(times_history)):
                time_elapsed_since += times_history[i];


            self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);

            if(keras.__version__.split(".")[1] == "3"):
                val_acc_history = history.history['val_accuracy'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['accuracy'];
                train_loss_history = history.history['loss'];
            else:
                val_acc_history = history.history['val_acc'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['acc'];
                train_loss_history = history.history['loss'];


            self.system_dict["training"]["outputs"]["best_val_acc"] = max(val_acc_history);
            self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = val_acc_history.index(self.system_dict["training"]["outputs"]["best_val_acc"]);
            self.system_dict["training"]["outputs"]["max_gpu_memory_usage"] = memory_callback.max_gpu_usage;


            if(self.system_dict["training"]["settings"]["save_training_logs"]):
                np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
                np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
                np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
                np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));

                

            if(self.system_dict["training"]["settings"]["display_progress"]):
                self.custom_print('    Training completed in: {:.0f}m {:.0f}s'.format(time_elapsed_since // 60, time_elapsed_since % 60))
                self.custom_print('    Best val Acc:          {:4f}'.format(self.system_dict["training"]["outputs"]["best_val_acc"]))
                self.custom_print("");



        elif(self.system_dict["states"]["eval_infer"]):
            msg = "Cannot train in testing (eval_infer) mode.\n";
            msg += "Tip - use new_experiment function with a copy_from argument.\n";
            raise ConstraintError(msg);



        else:
            self.custom_print("Training Start");

            self.system_dict = load_scheduler(self.system_dict);
            self.system_dict = load_optimizer(self.system_dict);
            self.system_dict = load_loss(self.system_dict);

            self.system_dict["training"]["status"] = False;

            pid = os.getpid();


            log_dir = self.system_dict["log_dir_relative"];
            model_dir = self.system_dict["model_dir_relative"];
            intermediate_model_prefix = self.system_dict["training"]["settings"]["intermediate_model_prefix"];
            save_intermediate_models = self.system_dict["training"]["settings"]["save_intermediate_models"];
            display_progress_realtime = self.system_dict["training"]["settings"]["display_progress_realtime"];
            display_progress = self.system_dict["training"]["settings"]["display_progress"];
            num_epochs = self.system_dict["hyper-parameters"]["num_epochs"];


            if(not self.system_dict["verbose"]):
                verbose=0;
            elif(display_progress_realtime):
                verbose=1;
            elif(display_progress):
                verbose=2;
            else:
                verbose=0;


            csv_logger = krc.CSVLogger(log_dir + "model_history_log.csv", append=False);
            ckpt_all = krc.ModelCheckpoint(model_dir + intermediate_model_prefix + '{epoch:02d}.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);

            ckpt_best = krc.ModelCheckpoint(model_dir + 'best_model.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=True, save_weights_only=False, mode='auto', period=1);

            resume = krc.ModelCheckpoint(model_dir + 'resume_state.h5', monitor='val_loss', verbose=verbose, 
                save_best_only=False, save_weights_only=False, mode='auto', period=1);
            
            time_callback = TimeHistory(log_dir)
            memory_callback = MemoryHistory();


            callbacks = [krc.History(), memory_callback, time_callback, csv_logger, resume, ckpt_best];

            if(save_intermediate_models):
                callbacks.append(ckpt_all);

            if(self.system_dict["local"]["learning_rate_scheduler"]):
                if(self.system_dict["hyper-parameters"]["learning_rate_scheduler"]["name"] == "reduceonplateaulr"):
                    callbacks.append(self.system_dict["local"]["learning_rate_scheduler"]);
                else:
                    callbacks.append(krc.LearningRateScheduler(self.system_dict["local"]["learning_rate_scheduler"]));


            step_size_train = self.system_dict["local"]["data_loaders"]["train"].n//self.system_dict["local"]["data_loaders"]["train"].batch_size;
            step_size_val = self.system_dict["local"]["data_loaders"]["val"].n//self.system_dict["local"]["data_loaders"]["val"].batch_size;


            initial_epoch = 0;

            if(not self.system_dict["verbose"]):
                verbose=0;
            elif(display_progress_realtime):
                verbose=1;
            elif(display_progress):
                verbose=2;
            else:
                verbose=0;


            self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
                loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);


            history = self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["train"],
                               steps_per_epoch=step_size_train,
                               epochs=num_epochs,
                               callbacks=callbacks,
                               validation_data=self.system_dict["local"]["data_loaders"]["val"],
                               validation_steps=step_size_val,
                               initial_epoch = initial_epoch,
                               verbose=verbose);



            time_elapsed_since = 0;
            for i in range(len(time_callback.times)):
                time_elapsed_since += time_callback.times[i];


            self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);

            if(keras.__version__.split(".")[1] == "3"):
                val_acc_history = history.history['val_accuracy'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['accuracy'];
                train_loss_history = history.history['loss'];
            else:
                val_acc_history = history.history['val_acc'];
                val_loss_history = history.history['val_loss'];
                train_acc_history = history.history['acc'];
                train_loss_history = history.history['loss'];

            self.system_dict["training"]["outputs"]["best_val_acc"] = max(val_acc_history);
            self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = val_acc_history.index(self.system_dict["training"]["outputs"]["best_val_acc"]);
            self.system_dict["training"]["outputs"]["max_gpu_memory_usage"] = memory_callback.max_gpu_usage;


            if(self.system_dict["training"]["settings"]["save_training_logs"]):
                np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
                np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
                np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
                np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));

               


            if(self.system_dict["training"]["settings"]["display_progress"]):
                self.custom_print('    Training completed in: {:.0f}m {:.0f}s'.format(time_elapsed_since // 60, time_elapsed_since % 60))
                self.custom_print('    Best val Acc:          {:4f}'.format(self.system_dict["training"]["outputs"]["best_val_acc"]))
                self.custom_print("");

            

        self.system_dict["local"]["model"].save(self.system_dict["model_dir_relative"] + "final.h5");
        self.system_dict["training"]["status"] = True;
        save(self.system_dict);



        if(not self.system_dict["states"]["eval_infer"]):
            self.custom_print("Training End");
            self.custom_print("");
            self.system_dict["training"]["outputs"]["best_val_acc"] = "{:4f}".format(self.system_dict["training"]["outputs"]["best_val_acc"]);
            self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"];
            self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);
            self.system_dict["training"]["outputs"]["max_gpu_usage"] = str(self.system_dict["training"]["outputs"]["max_gpu_memory_usage"]) + " Mb";


            if(self.system_dict["training"]["settings"]["save_training_logs"]): 
                self.custom_print("Training Outputs");
                self.custom_print("    Model Dir:   {}".format(self.system_dict["model_dir"]));
                self.custom_print("    Log Dir:     {}".format(self.system_dict["log_dir"]));
                self.custom_print("    Final model: {}".format("final"));
                self.custom_print("    Best model:  {}".format("best_model"));
                self.custom_print("    Log 1 - Validation accuracy history log: {}".format("val_acc_history.npy"));
                self.custom_print("    Log 2 - Validation loss history log:     {}".format("val_loss_history.npy"));
                self.custom_print("    Log 3 - Training accuracy history log:   {}".format("train_acc_history.npy"));
                self.custom_print("    Log 4 - Training loss history log:       {}".format("train_loss_history.npy"));
                self.custom_print("    Log 5 - Training curve:                  {}".format("train_loss_history.npy"));
                self.custom_print("    Log 6 - Validation curve:                {}".format("train_loss_history.npy"));
                self.custom_print("");

                np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
                np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
                np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
                np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));
                
                self.system_dict["training"]["outputs"]["log_val_acc_history"] = self.system_dict["log_dir"] + "val_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_val_loss_history"] = self.system_dict["log_dir"] + "val_loss_history.npy";
                self.system_dict["training"]["outputs"]["log_train_acc_history"] = self.system_dict["log_dir"] + "train_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_train_loss_history"] = self.system_dict["log_dir"] + "train_loss_history.npy";

                self.system_dict["training"]["outputs"]["log_val_acc_history_relative"] = self.system_dict["log_dir_relative"] + "val_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_val_loss_history_relative"] = self.system_dict["log_dir_relative"] + "val_loss_history.npy";
                self.system_dict["training"]["outputs"]["log_train_acc_history_relative"] = self.system_dict["log_dir_relative"] + "train_acc_history.npy";
                self.system_dict["training"]["outputs"]["log_train_loss_history_relative"] = self.system_dict["log_dir_relative"] + "train_loss_history.npy";


                create_train_test_plots_accuracy([train_acc_history, val_acc_history], ["Epoch Num", "Accuracy"], self.system_dict["log_dir"], show_img=False, save_img=True);
                create_train_test_plots_loss([train_loss_history, val_loss_history], ["Epoch Num", "Loss"], self.system_dict["log_dir"], show_img=False, save_img=True);

            self.system_dict["training"]["status"] = True;

Ancestors

  • tf_keras_1.finetune.level_2_model_base.finetune_model
  • tf_keras_1.finetune.level_1_dataset_base.finetune_dataset
  • system.base_class.system

Methods

def get_training_estimate(self)

Get estimated time for training a single epoch based on all set parameters

Args

None
 

Returns

float
Total time per epoch in seconds
Expand source code
def get_training_estimate(self):
    '''
    Get estimated time for training a single epoch based on all set parameters

    Args:
        None

    Returns:
        float: Total time per epoch in seconds
    '''
    total_time_per_epoch = 0;


    self.system_dict = load_scheduler(self.system_dict);
    self.system_dict = load_optimizer(self.system_dict);
    self.system_dict = load_loss(self.system_dict);

    self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
            loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);

    time_callback = TimeHistory();
    initial_epoch = 0;

    step_size_estimate = self.system_dict["local"]["data_loaders"]["estimate"].n//self.system_dict["local"]["data_loaders"]["estimate"].batch_size;

    self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["estimate"],
                       steps_per_epoch=step_size_estimate,
                       epochs=1,
                       callbacks=[time_callback],
                       workers=psutil.cpu_count(),
                       initial_epoch = initial_epoch,
                       verbose=0)

    time_taken = time_callback.times[0];
    num_images = len(self.system_dict["local"]["data_loaders"]["estimate"].labels);
    time_taken_per_image = time_taken/num_images;

    total_time_per_epoch = time_taken_per_image*(self.system_dict["dataset"]["params"]["num_train_images"] + 
        self.system_dict["dataset"]["params"]["num_val_images"]);

    return total_time_per_epoch;
def set_training_final(self)

Main training function

Args

None
 

Returns

None
 
Expand source code
def set_training_final(self):
    '''
    Main training function

    Args:
        None

    Returns:
        None
    '''
    if(self.system_dict["states"]["resume_train"]):
        self.custom_print("Training Resume");
        total_time_per_epoch = 0;

        self.system_dict = load_scheduler(self.system_dict);
        self.system_dict = load_optimizer(self.system_dict);
        self.system_dict = load_loss(self.system_dict);

        self.system_dict["training"]["status"] = False;

        pid = os.getpid();


        log_dir = self.system_dict["log_dir_relative"];
        model_dir = self.system_dict["model_dir_relative"];
        intermediate_model_prefix = self.system_dict["training"]["settings"]["intermediate_model_prefix"];
        save_intermediate_models = self.system_dict["training"]["settings"]["save_intermediate_models"];
        display_progress_realtime = self.system_dict["training"]["settings"]["display_progress_realtime"];
        display_progress = self.system_dict["training"]["settings"]["display_progress"];
        num_epochs = self.system_dict["hyper-parameters"]["num_epochs"];

        f = open(self.system_dict["log_dir"] + "/model_history_log.csv", 'r');
        lines = f.readlines();
        f.close();
        epochs_completed = len(lines)-1;

        if(self.system_dict["training"]["settings"]["save_training_logs"]):
            history_df = pd.read_csv(self.system_dict["log_dir"] + "/model_history_log.csv");
            val_acc_history = history_df['val_acc'].tolist();
            train_acc_history = history_df['acc'].tolist();
            val_loss_history = history_df['val_loss'].tolist();
            train_loss_history = history_df['loss'].tolist();

        f = open(self.system_dict["log_dir"] + "/times.txt", 'r');
        lines = f.readlines();
        times_history = [];
        for i in range(len(lines)):
            times_history.append(float(lines[i][:len(lines[i])-1]));


        csv_logger = krc.CSVLogger(log_dir + "model_history_log.csv", append=True);

        if(not self.system_dict["verbose"]):
            verbose=0;
        elif(display_progress_realtime):
            verbose=1;
        elif(display_progress):
            verbose=2;
        else:
            verbose=0;
        

        ckpt_all = krc.ModelCheckpoint(model_dir + intermediate_model_prefix + '{epoch:02d}.h5', monitor='val_loss', verbose=verbose, 
            save_best_only=False, save_weights_only=False, mode='auto', period=1);

        ckpt_best = krc.ModelCheckpoint(model_dir + 'best_model.h5', monitor='val_loss', verbose=verbose, 
            save_best_only=True, save_weights_only=False, mode='auto', period=1);

        resume = krc.ModelCheckpoint(model_dir + 'resume_state.h5', monitor='val_loss', verbose=verbose, 
            save_best_only=False, save_weights_only=False, mode='auto', period=1);
        
        time_callback = TimeHistory(log_dir)
        memory_callback = MemoryHistory();


        callbacks = [krc.History(), memory_callback, time_callback, csv_logger, resume, ckpt_best];

        if(save_intermediate_models):
            callbacks.append(ckpt_all);

        if(self.system_dict["local"]["learning_rate_scheduler"]):
            if(self.system_dict["hyper-parameters"]["learning_rate_scheduler"]["name"] == "reduceonplateaulr"):
                callbacks.append(self.system_dict["local"]["learning_rate_scheduler"]);
            else:
                callbacks.append(krc.LearningRateScheduler(self.system_dict["local"]["learning_rate_scheduler"]));


        step_size_train = self.system_dict["local"]["data_loaders"]["train"].n//self.system_dict["local"]["data_loaders"]["train"].batch_size;
        step_size_val = self.system_dict["local"]["data_loaders"]["val"].n//self.system_dict["local"]["data_loaders"]["val"].batch_size;


        initial_epoch = epochs_completed;

        


        self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
            loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);


        history = self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["train"],
                           steps_per_epoch=step_size_train,
                           epochs=num_epochs,
                           callbacks=callbacks,
                           validation_data=self.system_dict["local"]["data_loaders"]["val"],
                           validation_steps=step_size_val,
                           initial_epoch = initial_epoch,
                           verbose=verbose);

        time_elapsed_since = 0;
        times_history += time_callback.times
        for i in range(len(times_history)):
            time_elapsed_since += times_history[i];


        self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);

        if(keras.__version__.split(".")[1] == "3"):
            val_acc_history = history.history['val_accuracy'];
            val_loss_history = history.history['val_loss'];
            train_acc_history = history.history['accuracy'];
            train_loss_history = history.history['loss'];
        else:
            val_acc_history = history.history['val_acc'];
            val_loss_history = history.history['val_loss'];
            train_acc_history = history.history['acc'];
            train_loss_history = history.history['loss'];


        self.system_dict["training"]["outputs"]["best_val_acc"] = max(val_acc_history);
        self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = val_acc_history.index(self.system_dict["training"]["outputs"]["best_val_acc"]);
        self.system_dict["training"]["outputs"]["max_gpu_memory_usage"] = memory_callback.max_gpu_usage;


        if(self.system_dict["training"]["settings"]["save_training_logs"]):
            np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
            np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
            np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
            np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));

            

        if(self.system_dict["training"]["settings"]["display_progress"]):
            self.custom_print('    Training completed in: {:.0f}m {:.0f}s'.format(time_elapsed_since // 60, time_elapsed_since % 60))
            self.custom_print('    Best val Acc:          {:4f}'.format(self.system_dict["training"]["outputs"]["best_val_acc"]))
            self.custom_print("");



    elif(self.system_dict["states"]["eval_infer"]):
        msg = "Cannot train in testing (eval_infer) mode.\n";
        msg += "Tip - use new_experiment function with a copy_from argument.\n";
        raise ConstraintError(msg);



    else:
        self.custom_print("Training Start");

        self.system_dict = load_scheduler(self.system_dict);
        self.system_dict = load_optimizer(self.system_dict);
        self.system_dict = load_loss(self.system_dict);

        self.system_dict["training"]["status"] = False;

        pid = os.getpid();


        log_dir = self.system_dict["log_dir_relative"];
        model_dir = self.system_dict["model_dir_relative"];
        intermediate_model_prefix = self.system_dict["training"]["settings"]["intermediate_model_prefix"];
        save_intermediate_models = self.system_dict["training"]["settings"]["save_intermediate_models"];
        display_progress_realtime = self.system_dict["training"]["settings"]["display_progress_realtime"];
        display_progress = self.system_dict["training"]["settings"]["display_progress"];
        num_epochs = self.system_dict["hyper-parameters"]["num_epochs"];


        if(not self.system_dict["verbose"]):
            verbose=0;
        elif(display_progress_realtime):
            verbose=1;
        elif(display_progress):
            verbose=2;
        else:
            verbose=0;


        csv_logger = krc.CSVLogger(log_dir + "model_history_log.csv", append=False);
        ckpt_all = krc.ModelCheckpoint(model_dir + intermediate_model_prefix + '{epoch:02d}.h5', monitor='val_loss', verbose=verbose, 
            save_best_only=False, save_weights_only=False, mode='auto', period=1);

        ckpt_best = krc.ModelCheckpoint(model_dir + 'best_model.h5', monitor='val_loss', verbose=verbose, 
            save_best_only=True, save_weights_only=False, mode='auto', period=1);

        resume = krc.ModelCheckpoint(model_dir + 'resume_state.h5', monitor='val_loss', verbose=verbose, 
            save_best_only=False, save_weights_only=False, mode='auto', period=1);
        
        time_callback = TimeHistory(log_dir)
        memory_callback = MemoryHistory();


        callbacks = [krc.History(), memory_callback, time_callback, csv_logger, resume, ckpt_best];

        if(save_intermediate_models):
            callbacks.append(ckpt_all);

        if(self.system_dict["local"]["learning_rate_scheduler"]):
            if(self.system_dict["hyper-parameters"]["learning_rate_scheduler"]["name"] == "reduceonplateaulr"):
                callbacks.append(self.system_dict["local"]["learning_rate_scheduler"]);
            else:
                callbacks.append(krc.LearningRateScheduler(self.system_dict["local"]["learning_rate_scheduler"]));


        step_size_train = self.system_dict["local"]["data_loaders"]["train"].n//self.system_dict["local"]["data_loaders"]["train"].batch_size;
        step_size_val = self.system_dict["local"]["data_loaders"]["val"].n//self.system_dict["local"]["data_loaders"]["val"].batch_size;


        initial_epoch = 0;

        if(not self.system_dict["verbose"]):
            verbose=0;
        elif(display_progress_realtime):
            verbose=1;
        elif(display_progress):
            verbose=2;
        else:
            verbose=0;


        self.system_dict["local"]["model"].compile(optimizer=self.system_dict["local"]["optimizer"], 
            loss=self.system_dict["local"]["criterion"], metrics=['accuracy']);


        history = self.system_dict["local"]["model"].fit_generator(generator=self.system_dict["local"]["data_loaders"]["train"],
                           steps_per_epoch=step_size_train,
                           epochs=num_epochs,
                           callbacks=callbacks,
                           validation_data=self.system_dict["local"]["data_loaders"]["val"],
                           validation_steps=step_size_val,
                           initial_epoch = initial_epoch,
                           verbose=verbose);



        time_elapsed_since = 0;
        for i in range(len(time_callback.times)):
            time_elapsed_since += time_callback.times[i];


        self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);

        if(keras.__version__.split(".")[1] == "3"):
            val_acc_history = history.history['val_accuracy'];
            val_loss_history = history.history['val_loss'];
            train_acc_history = history.history['accuracy'];
            train_loss_history = history.history['loss'];
        else:
            val_acc_history = history.history['val_acc'];
            val_loss_history = history.history['val_loss'];
            train_acc_history = history.history['acc'];
            train_loss_history = history.history['loss'];

        self.system_dict["training"]["outputs"]["best_val_acc"] = max(val_acc_history);
        self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = val_acc_history.index(self.system_dict["training"]["outputs"]["best_val_acc"]);
        self.system_dict["training"]["outputs"]["max_gpu_memory_usage"] = memory_callback.max_gpu_usage;


        if(self.system_dict["training"]["settings"]["save_training_logs"]):
            np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
            np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
            np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
            np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));

           


        if(self.system_dict["training"]["settings"]["display_progress"]):
            self.custom_print('    Training completed in: {:.0f}m {:.0f}s'.format(time_elapsed_since // 60, time_elapsed_since % 60))
            self.custom_print('    Best val Acc:          {:4f}'.format(self.system_dict["training"]["outputs"]["best_val_acc"]))
            self.custom_print("");

        

    self.system_dict["local"]["model"].save(self.system_dict["model_dir_relative"] + "final.h5");
    self.system_dict["training"]["status"] = True;
    save(self.system_dict);



    if(not self.system_dict["states"]["eval_infer"]):
        self.custom_print("Training End");
        self.custom_print("");
        self.system_dict["training"]["outputs"]["best_val_acc"] = "{:4f}".format(self.system_dict["training"]["outputs"]["best_val_acc"]);
        self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"] = self.system_dict["training"]["outputs"]["best_val_acc_epoch_num"];
        self.system_dict["training"]["outputs"]["training_time"] = "{:.0f}m {:.0f}s".format(time_elapsed_since // 60, time_elapsed_since % 60);
        self.system_dict["training"]["outputs"]["max_gpu_usage"] = str(self.system_dict["training"]["outputs"]["max_gpu_memory_usage"]) + " Mb";


        if(self.system_dict["training"]["settings"]["save_training_logs"]): 
            self.custom_print("Training Outputs");
            self.custom_print("    Model Dir:   {}".format(self.system_dict["model_dir"]));
            self.custom_print("    Log Dir:     {}".format(self.system_dict["log_dir"]));
            self.custom_print("    Final model: {}".format("final"));
            self.custom_print("    Best model:  {}".format("best_model"));
            self.custom_print("    Log 1 - Validation accuracy history log: {}".format("val_acc_history.npy"));
            self.custom_print("    Log 2 - Validation loss history log:     {}".format("val_loss_history.npy"));
            self.custom_print("    Log 3 - Training accuracy history log:   {}".format("train_acc_history.npy"));
            self.custom_print("    Log 4 - Training loss history log:       {}".format("train_loss_history.npy"));
            self.custom_print("    Log 5 - Training curve:                  {}".format("train_loss_history.npy"));
            self.custom_print("    Log 6 - Validation curve:                {}".format("train_loss_history.npy"));
            self.custom_print("");

            np.save(self.system_dict["log_dir"] + "val_acc_history.npy", np.array(val_acc_history));
            np.save(self.system_dict["log_dir"] + "val_loss_history.npy", np.array(val_loss_history));
            np.save(self.system_dict["log_dir"] + "train_acc_history.npy", np.array(train_acc_history));
            np.save(self.system_dict["log_dir"] + "train_loss_history.npy", np.array(train_loss_history));
            
            self.system_dict["training"]["outputs"]["log_val_acc_history"] = self.system_dict["log_dir"] + "val_acc_history.npy";
            self.system_dict["training"]["outputs"]["log_val_loss_history"] = self.system_dict["log_dir"] + "val_loss_history.npy";
            self.system_dict["training"]["outputs"]["log_train_acc_history"] = self.system_dict["log_dir"] + "train_acc_history.npy";
            self.system_dict["training"]["outputs"]["log_train_loss_history"] = self.system_dict["log_dir"] + "train_loss_history.npy";

            self.system_dict["training"]["outputs"]["log_val_acc_history_relative"] = self.system_dict["log_dir_relative"] + "val_acc_history.npy";
            self.system_dict["training"]["outputs"]["log_val_loss_history_relative"] = self.system_dict["log_dir_relative"] + "val_loss_history.npy";
            self.system_dict["training"]["outputs"]["log_train_acc_history_relative"] = self.system_dict["log_dir_relative"] + "train_acc_history.npy";
            self.system_dict["training"]["outputs"]["log_train_loss_history_relative"] = self.system_dict["log_dir_relative"] + "train_loss_history.npy";


            create_train_test_plots_accuracy([train_acc_history, val_acc_history], ["Epoch Num", "Accuracy"], self.system_dict["log_dir"], show_img=False, save_img=True);
            create_train_test_plots_loss([train_loss_history, val_loss_history], ["Epoch Num", "Loss"], self.system_dict["log_dir"], show_img=False, save_img=True);

        self.system_dict["training"]["status"] = True;