Module monk.gluon_prototype
Expand source code
from gluon.finetune.imports import *
from system.imports import *
from gluon.finetune.level_14_master_main import prototype_master
class prototype(prototype_master):
'''
Main class for Mxnet Backend
Args:
verbose (int): Set verbosity levels
0 - Print Nothing
1 - Print desired details
'''
def __init__(self, verbose=1):
super().__init__(verbose=verbose);
self.system_dict["library"] = "Mxnet";
self.custom_print("Mxnet Version: {}".format(mx.__version__));
self.custom_print("");
###############################################################################################################################################
def Prototype(self, project_name, experiment_name, eval_infer=False, resume_train=False, copy_from=False, pseudo_copy_from=False, summary=False):
'''
Create project and experiment for instantiation and running the experiments
Args:
project_name (str): Project Name
experiment_name (str): Experiment Name
eval_infer (bool): If set as True, model is loaded in evaluation mode
resume_train (bool): If set as True, model is loaded from last checkpoint
copy_from (list): [project, experiment] to copy from
pseudo_copy_from (list): For creating sub-experiments while in hyper-parametric analysis state
summary (list): Dummy variable
Returns:
None
'''
self.set_system_project(project_name);
self.set_system_experiment(experiment_name, eval_infer=eval_infer, resume_train=resume_train, copy_from=copy_from,
pseudo_copy_from=pseudo_copy_from, summary=summary);
self.custom_print("Experiment Details");
self.custom_print(" Project: {}".format(self.system_dict["project_name"]));
self.custom_print(" Experiment: {}".format(self.system_dict["experiment_name"]));
self.custom_print(" Dir: {}".format(self.system_dict["experiment_dir"]));
self.custom_print("");
###############################################################################################################################################
###############################################################################################################################################
def Default(self, dataset_path=False, path_to_csv=False, delimiter=",", model_name="resnet18_v1", freeze_base_network=True, num_epochs=10):
'''
Use monk in default (quick prototyping) mode
Args:
dataset_path (str, list): Path to Dataset folder
1) Single string if validation data does not exist
2) List [train_path, val_path] in case of separate train and val data
path_to_csv (str, list): Path to csv file pointing towards images
1) Single string if validation data does not exist
2) List [train_path, val_path] in case of separate train and val data
delimiter (str): Delimiter for csv file
model_name (str): Base model name
freeze_base_network (bool): If True base network is freezed
num_epochs (int): Number of epochs to train the data
Returns:
None
'''
if(self.system_dict["states"]["eval_infer"]):
self.Dataset_Params(dataset_path=dataset_path, import_as_csv=import_as_csv, path_to_csv=path_to_csv, delimiter=delimiter);
self.Dataset();
else:
input_size=224;
self.Dataset_Params(dataset_path=dataset_path, path_to_csv=path_to_csv, delimiter=delimiter,
split=0.7, input_size=input_size, batch_size=4, shuffle_data=True, num_processors=psutil.cpu_count());
#train-val
self.apply_random_horizontal_flip(probability=0.8, train=True, val=True);
self.apply_normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], train=True, val=True, test=True);
self.Dataset();
self.Model_Params(model_name=model_name, freeze_base_network=freeze_base_network, use_gpu=True, use_pretrained=True);
self.Model();
model_name = self.system_dict["model"]["params"]["model_name"];
if("resnet" in model_name or "alexnet" in model_name or "darknet" in model_name or "xception" in model_name):
self.optimizer_sgd(0.01);
if(num_epochs>10):
self.lr_step_decrease(max(min(num_epochs//3, 8), 1), gamma=0.1);
else:
self.lr_step_decrease(1, gamma=0.98);
self.loss_softmax_crossentropy();
elif("vgg" in model_name):
self.optimizer_sgd(0.001);
if(num_epochs>10):
self.lr_step_decrease(max(min(num_epochs//3, 8), 1), gamma=0.1);
else:
self.lr_step_decrease(1, gamma=0.98);
self.loss_softmax_crossentropy();
elif("squeezenet1.0" in model_name):
self.optimizer_sgd(0.04, weight_decay=0.0002);
self.lr_step_decrease(1, gamma=0.98);
self.loss_softmax_crossentropy();
elif("squeezenet1.1" in model_name):
self.optimizer_sgd(0.001, weight_decay=0.0002);
self.lr_step_decrease(1, gamma=0.98);
self.loss_softmax_crossentropy();
elif("dense" in model_name):
self.optimizer_sgd(0.01, weight_decay=0.0001);
if(num_epochs>10):
self.lr_multistep_decrease([max(num_epochs//2, 1), max(3*num_epochs//4, 2)]);
else:
self.lr_step_decrease(1, gamma=0.98);
self.loss_softmax_crossentropy();
elif("resnext" in model_name or "senet" in model_name):
self.optimizer_sgd(0.01, weight_decay=0.0001);
if(num_epochs>10):
self.lr_step_decrease(max(num_epochs//3, 1), gamma=0.1);
else:
self.lr_step_decrease(1, gamma=0.98);
self.loss_softmax_crossentropy();
elif("mobile" in model_name):
self.optimizer_sgd(0.01, weight_decay=0.00004, momentum=0.9);
self.lr_step_decrease(1, gamma=0.97);
self.loss_softmax_crossentropy();
elif("inception" in model_name):
self.optimizer_sgd(0.045, weight_decay=0.0001, momentum=0.9);
self.lr_step_decrease(1, gamma=0.9);
self.loss_softmax_crossentropy();
self.Training_Params(num_epochs=num_epochs, display_progress=True, display_progress_realtime=True,
save_intermediate_models=True, intermediate_model_prefix="intermediate_model_", save_training_logs=True);
self.system_dict["hyper-parameters"]["status"] = True;
save(self.system_dict);
###############################################################################################################################################
###############################################################################################################################################
def Summary(self):
'''
Print summary of entire project
Args:
None
Returns:
None
'''
print_summary(self.system_dict["fname_relative"]);
###############################################################################################################################################
###############################################################################################################################################
def List_Models(self):
'''
List all base models supported.
Args:
None
Returns:
None
'''
self.print_list_models();
###############################################################################################################################################
## Will be depricated in v2.0
###############################################################################################################################################
def List_Layers(self):
'''
List all layers available for appending the base model.
Args:
None
Returns:
None
'''
self.print_list_layers_transfer_learning();
###############################################################################################################################################
###############################################################################################################################################
def List_Layers_Transfer_Learning(self):
'''
List all layers available for appending the base model.
Args:
None
Returns:
None
'''
self.print_list_layers_transfer_learning();
###############################################################################################################################################
###############################################################################################################################################
def List_Layers_Custom_Model(self):
'''
List all layers available for building a custom model.
Args:
None
Returns:
None
'''
self.print_list_layers_custom_model();
###############################################################################################################################################
## Will be depricated in v2.0
###############################################################################################################################################
def List_Activations(self):
'''
List all activations available for appending the base model.
Args:
None
Returns:
None
'''
self.print_list_activations_transfer_learning();
###############################################################################################################################################
###############################################################################################################################################
def List_Activations_Transfer_Learning(self):
'''
List all activations available for appending the base model.
Args:
None
Returns:
None
'''
self.print_list_activations_transfer_learning();
###############################################################################################################################################
###############################################################################################################################################
def List_Activations_Custom_Model(self):
'''
List all activations available for building a custom model.
Args:
None
Returns:
None
'''
self.print_list_activations_custom_model();
###############################################################################################################################################
###############################################################################################################################################
def List_Losses(self):
'''
List all loss functions available.
Args:
None
Returns:
None
'''
self.print_list_losses();
###############################################################################################################################################
###############################################################################################################################################
def List_Optimizers(self):
'''
List all optimizers functions available.
Args:
None
Returns:
None
'''
self.print_list_optimizers();
###############################################################################################################################################
###############################################################################################################################################
def List_Schedulers(self):
'''
List all learning rate scheduler functions available.
Args:
None
Returns:
None
'''
self.print_list_schedulers();
###############################################################################################################################################
###############################################################################################################################################
def List_Transforms(self):
'''
List all data transformation functions available.
Args:
None
Returns:
None
'''
self.print_list_transforms();
###############################################################################################################################################
###############################################################################################################################################
def List_Blocks(self):
'''
List all blocks available for building a custom model.
Args:
None
Returns:
None
'''
self.print_list_blocks();
###############################################################################################################################################
###############################################################################################################################################
def Analyse_Learning_Rates(self, analysis_name, lr_list, percent_data, num_epochs=2, state="keep_all"):
'''
Hyperparameter Tuner - Analyse learning rate
Takes in a list of learning rates and trains on a part of dataset
Provides summaries and graphs on every sub-experiment created
Args:
analysis_name (str): A suitable name for analysis
lr_list (list): List of learning rates.
percent_data (int): Percentage of complete dataset to run experiments on.
num_epochs (int): Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment
If set as "keep_none", keeps only comparison files for each experiment
Returns:
dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
'''
from gluon_prototype import prototype
project = analysis_name;
self.custom_print("");
self.custom_print("Running Learning rate analysis"); #Change 1
self.custom_print("Analysis Name : {}".format(project));
self.custom_print("");
for i in range(len(lr_list)): #Change 2
gtf_ = prototype(verbose=0);
self.custom_print("Running experiment : {}/{}".format(i+1, len(lr_list))); #Change 3
experiment = "Learning_Rate_" + str(lr_list[i]); #Change 4
self.custom_print("Experiment name : {}".format(experiment))
gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]);
gtf_.Dataset_Percent(percent_data);
dataset_type = gtf_.system_dict["dataset"]["dataset_type"];
dataset_train_path = gtf_.system_dict["dataset"]["train_path"];
dataset_val_path = gtf_.system_dict["dataset"]["val_path"];
csv_train = gtf_.system_dict["dataset"]["csv_train"];
csv_val = gtf_.system_dict["dataset"]["csv_val"];
if(dataset_type=="train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
elif(dataset_type=="csv_train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="csv_train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
gtf_.update_learning_rate(lr_list[i]) #Change 5
gtf_.Reload(); #Change 6
gtf_.update_num_epochs(num_epochs);
gtf_.update_display_progress_realtime(False)
gtf_.update_save_intermediate_models(False);
total_time_per_epoch = gtf_.get_training_estimate();
total_time = total_time_per_epoch*num_epochs;
if(int(total_time//60) == 0):
self.custom_print("Estimated time : {} sec".format(total_time));
else:
self.custom_print("Estimated time : {} min".format(int(total_time//60)+1));
gtf_.Train();
self.custom_print("Experiment Complete");
self.custom_print("\n");
self.custom_print("Comparing Experiments");
from compare_prototype import compare
ctf_ = compare(verbose=0);
ctf_.Comparison("Comparison_" + analysis_name);
self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name));
training_accuracies = [];
validation_accuracies = [];
training_losses = [];
validation_losses = [];
tabular_data = [];
for i in range(len(lr_list)): #Change 7
project = analysis_name;
experiment = "Learning_Rate_" + str(lr_list[i]); #Change 8
ctf_.Add_Experiment(project, experiment)
tmp = [];
tmp.append(experiment);
training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy";
tmp.append(np.load(training_accuracy_file)[-1]);
validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy";
tmp.append(np.load(validation_accuracy_file)[-1]);
training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy";
tmp.append(np.load(training_loss_file)[-1]);
validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy";
tmp.append(np.load(validation_loss_file)[-1]);
tabular_data.append(tmp)
ctf_.Generate_Statistics();
self.custom_print("Generated statistics post all epochs");
self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl'));
self.custom_print("");
return_dict = {};
for i in range(len(tabular_data)):
return_dict[tabular_data[i][0]] = {};
return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1];
return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2];
return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3];
return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4];
fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json";
system_dict = read_json(fname);
return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"];
if(state=="keep_none"):
shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name);
return return_dict
###############################################################################################################################################
###############################################################################################################################################
def Analyse_Input_Sizes(self, analysis_name, inp_size_list, percent_data, num_epochs=2, state="keep_all"):
'''
Hyperparameter Tuner - Analyse input sizes
Takes in a list of input sizes and trains on a part of dataset
Provides summaries and graphs on every sub-experiment created
Args:
analysis_name (str): A suitable name for analysis
inp_size_list (list): List of input_sizes.
percent_data (int): Percentage of complete dataset to run experiments on.
num_epochs (int): Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment
If set as "keep_none", keeps only comparison files for each experiment
Returns:
dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
'''
from gluon_prototype import prototype
project = analysis_name;
self.custom_print("");
self.custom_print("Running Input Size analysis"); #Change 1
self.custom_print("Analysis Name : {}".format(project));
self.custom_print("");
for i in range(len(inp_size_list)): #Change 2
gtf_ = prototype(verbose=0);
self.custom_print("Running experiment : {}/{}".format(i+1, len(inp_size_list))); #Change 3
experiment = "Input_Size_" + str(inp_size_list[i]); #Change 4
self.custom_print("Experiment name : {}".format(experiment))
gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]);
gtf_.Dataset_Percent(percent_data);
dataset_type = gtf_.system_dict["dataset"]["dataset_type"];
dataset_train_path = gtf_.system_dict["dataset"]["train_path"];
dataset_val_path = gtf_.system_dict["dataset"]["val_path"];
csv_train = gtf_.system_dict["dataset"]["csv_train"];
csv_val = gtf_.system_dict["dataset"]["csv_val"];
if(dataset_type=="train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
elif(dataset_type=="csv_train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="csv_train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
gtf_.update_input_size(inp_size_list[i]) #Change 5
gtf_.Reload(); #Change 6
gtf_.update_num_epochs(num_epochs);
gtf_.update_display_progress_realtime(False)
gtf_.update_save_intermediate_models(False);
total_time_per_epoch = gtf_.get_training_estimate();
total_time = total_time_per_epoch*num_epochs;
if(int(total_time//60) == 0):
self.custom_print("Estimated time : {} sec".format(total_time));
else:
self.custom_print("Estimated time : {} min".format(int(total_time//60)+1));
gtf_.Train();
self.custom_print("Experiment Complete");
self.custom_print("\n");
self.custom_print("Comparing Experiments");
from compare_prototype import compare
ctf_ = compare(verbose=0);
ctf_.Comparison("Comparison_" + analysis_name);
self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name));
training_accuracies = [];
validation_accuracies = [];
training_losses = [];
validation_losses = [];
tabular_data = [];
for i in range(len(inp_size_list)): #Change 7
project = analysis_name;
experiment = "Input_Size_" + str(inp_size_list[i]); #Change 8
ctf_.Add_Experiment(project, experiment)
tmp = [];
tmp.append(experiment);
training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy";
tmp.append(np.load(training_accuracy_file)[-1]);
validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy";
tmp.append(np.load(validation_accuracy_file)[-1]);
training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy";
tmp.append(np.load(training_loss_file)[-1]);
validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy";
tmp.append(np.load(validation_loss_file)[-1]);
tabular_data.append(tmp)
ctf_.Generate_Statistics();
self.custom_print("Generated statistics post all epochs");
self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl'));
self.custom_print("");
return_dict = {};
for i in range(len(tabular_data)):
return_dict[tabular_data[i][0]] = {};
return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1];
return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2];
return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3];
return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4];
fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json";
system_dict = read_json(fname);
return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"];
if(state=="keep_none"):
shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name);
return return_dict
###############################################################################################################################################
###############################################################################################################################################
def Analyse_Batch_Sizes(self, analysis_name, batch_size_list, percent_data, num_epochs=2, state="keep_all"):
'''
Hyperparameter Tuner - Analyse batch sizes
Takes in a list of batch sizes and trains on a part of dataset
Provides summaries and graphs on every sub-experiment created
Args:
analysis_name (str): A suitable name for analysis
inp_size_list (list): List of batch sizes.
percent_data (int): Percentage of complete dataset to run experiments on.
num_epochs (int): Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment
If set as "keep_none", keeps only comparison files for each experiment
Returns:
dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
'''
from gluon_prototype import prototype
project = analysis_name;
self.custom_print("");
self.custom_print("Running Batch Size analysis"); #Change 1
self.custom_print("Analysis Name : {}".format(project));
self.custom_print("");
for i in range(len(batch_size_list)): #Change 2
gtf_ = prototype(verbose=0);
self.custom_print("Running experiment : {}/{}".format(i+1, len(batch_size_list))); #Change 3
experiment = "Batch_Size_" + str(batch_size_list[i]); #Change 4, 5
self.custom_print("Experiment name : {}".format(experiment))
gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]);
gtf_.Dataset_Percent(percent_data);
dataset_type = gtf_.system_dict["dataset"]["dataset_type"];
dataset_train_path = gtf_.system_dict["dataset"]["train_path"];
dataset_val_path = gtf_.system_dict["dataset"]["val_path"];
csv_train = gtf_.system_dict["dataset"]["csv_train"];
csv_val = gtf_.system_dict["dataset"]["csv_val"];
if(dataset_type=="train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
elif(dataset_type=="csv_train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="csv_train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
gtf_.update_batch_size(batch_size_list[i]) #Change 6
gtf_.Reload(); #Change 7
gtf_.update_num_epochs(num_epochs);
gtf_.update_display_progress_realtime(False)
gtf_.update_save_intermediate_models(False);
total_time_per_epoch = gtf_.get_training_estimate();
total_time = total_time_per_epoch*num_epochs;
if(int(total_time//60) == 0):
self.custom_print("Estimated time : {} sec".format(total_time));
else:
self.custom_print("Estimated time : {} min".format(int(total_time//60)+1));
gtf_.Train();
self.custom_print("Experiment Complete");
self.custom_print("\n");
self.custom_print("Comparing Experiments");
from compare_prototype import compare
ctf_ = compare(verbose=0);
ctf_.Comparison("Comparison_" + analysis_name);
self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name));
training_accuracies = [];
validation_accuracies = [];
training_losses = [];
validation_losses = [];
tabular_data = [];
for i in range(len(batch_size_list)): #Change 8
project = analysis_name;
experiment = "Batch_Size_" + str(batch_size_list[i]); #Change 9, 10
ctf_.Add_Experiment(project, experiment)
tmp = [];
tmp.append(experiment);
training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy";
tmp.append(np.load(training_accuracy_file)[-1]);
validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy";
tmp.append(np.load(validation_accuracy_file)[-1]);
training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy";
tmp.append(np.load(training_loss_file)[-1]);
validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy";
tmp.append(np.load(validation_loss_file)[-1]);
tabular_data.append(tmp)
ctf_.Generate_Statistics();
self.custom_print("Generated statistics post all epochs");
self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl'));
self.custom_print("");
return_dict = {};
for i in range(len(tabular_data)):
return_dict[tabular_data[i][0]] = {};
return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1];
return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2];
return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3];
return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4];
fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json";
system_dict = read_json(fname);
return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"];
if(state=="keep_none"):
shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name);
return return_dict
###############################################################################################################################################
###############################################################################################################################################
def Analyse_Models(self, analysis_name, model_list, percent_data, num_epochs=2, state="keep_all"):
'''
Hyperparameter Tuner - Analyse base models
Takes in a list of base models and trains on a part of dataset
Provides summaries and graphs on every sub-experiment created
Args:
analysis_name (str): A suitable name for analysis
inp_size_list (list of list): List of base models.
The format is [model_name_string, freeze_base_model_bool, use_pretrained_model_bool]
1) First arg - Model name in string
2) Second arg - Whether to freeze base model or not
3) Thrid arg - Whether to use pretrained model or use randomly initialized weights
percent_data (int): Percentage of complete dataset to run experiments on.
num_epochs (int): Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment
If set as "keep_none", keeps only comparison files for each experiment
Returns:
dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
'''
from gluon_prototype import prototype
project = analysis_name;
self.custom_print("");
self.custom_print("Running Model analysis"); #Change 1
self.custom_print("Analysis Name : {}".format(project));
self.custom_print("");
for i in range(len(model_list)): #Change 2
gtf_ = prototype(verbose=0);
self.custom_print("Running experiment : {}/{}".format(i+1, len(model_list))); #Change 3
if(model_list[i][1]):
experiment = "Model_" + str(model_list[i][0]) + "_freeze_base"; #Change 4, 5
else:
experiment = "Model_" + str(model_list[i][0]) + "_unfreeze_base";
if(model_list[i][2]):
experiment += "_pretrained";
else:
experiment += "_uninitialized";
self.custom_print("Experiment name : {}".format(experiment))
gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]);
gtf_.Dataset_Percent(percent_data);
dataset_type = gtf_.system_dict["dataset"]["dataset_type"];
dataset_train_path = gtf_.system_dict["dataset"]["train_path"];
dataset_val_path = gtf_.system_dict["dataset"]["val_path"];
csv_train = gtf_.system_dict["dataset"]["csv_train"];
csv_val = gtf_.system_dict["dataset"]["csv_val"];
if(dataset_type=="train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
elif(dataset_type=="csv_train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="csv_train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
gtf_.update_model_name(model_list[i][0]) #Change 6
gtf_.update_freeze_base_network(model_list[i][1])
gtf_.update_use_pretrained(model_list[i][2])
gtf_.Reload(); #Change 7
gtf_.update_num_epochs(num_epochs);
gtf_.update_display_progress_realtime(False)
gtf_.update_save_intermediate_models(False);
total_time_per_epoch = gtf_.get_training_estimate();
total_time = total_time_per_epoch*num_epochs;
if(int(total_time//60) == 0):
self.custom_print("Estimated time : {} sec".format(total_time));
else:
self.custom_print("Estimated time : {} min".format(int(total_time//60)+1));
gtf_.Train();
self.custom_print("Experiment Complete");
self.custom_print("\n");
self.custom_print("Comparing Experiments");
from compare_prototype import compare
ctf_ = compare(verbose=0);
ctf_.Comparison("Comparison_" + analysis_name);
self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name));
training_accuracies = [];
validation_accuracies = [];
training_losses = [];
validation_losses = [];
tabular_data = [];
for i in range(len(model_list)): #Change 8
project = analysis_name;
if(model_list[i][1]):
experiment = "Model_" + str(model_list[i][0]) + "_freeze_base"; #Change 9, 10
else:
experiment = "Model_" + str(model_list[i][0]) + "_unfreeze_base";
if(model_list[i][2]):
experiment += "_pretrained";
else:
experiment += "_uninitialized";
ctf_.Add_Experiment(project, experiment)
tmp = [];
tmp.append(experiment);
training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy";
tmp.append(np.load(training_accuracy_file)[-1]);
validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy";
tmp.append(np.load(validation_accuracy_file)[-1]);
training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy";
tmp.append(np.load(training_loss_file)[-1]);
validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy";
tmp.append(np.load(validation_loss_file)[-1]);
tabular_data.append(tmp)
ctf_.Generate_Statistics();
self.custom_print("Generated statistics post all epochs");
self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl'));
self.custom_print("");
return_dict = {};
for i in range(len(tabular_data)):
return_dict[tabular_data[i][0]] = {};
return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1];
return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2];
return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3];
return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4];
fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json";
system_dict = read_json(fname);
return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"];
if(state=="keep_none"):
shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name);
return return_dict
###############################################################################################################################################
###############################################################################################################################################
def Analyse_Optimizers(self, analysis_name, optimizer_list, percent_data, num_epochs=2, state="keep_all"):
'''
Hyperparameter Tuner - Analyse optimizers
Takes in a list of optimizers and trains on a part of dataset
Provides summaries and graphs on every sub-experiment created
Args:
analysis_name (str): A suitable name for analysis
inp_size_list (list): List of optimizers.
percent_data (int): Percentage of complete dataset to run experiments on.
num_epochs (int): Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment
If set as "keep_none", keeps only comparison files for each experiment
Returns:
dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
'''
from gluon_prototype import prototype
project = analysis_name;
self.custom_print("");
self.custom_print("Running Optimizer analysis"); #Change 1
self.custom_print("Analysis Name : {}".format(project));
self.custom_print("");
for i in range(len(optimizer_list)): #Change 2
gtf_ = prototype(verbose=0);
self.custom_print("Running experiment : {}/{}".format(i+1, len(optimizer_list))); #Change 3
experiment = "Optimizer_" + str(optimizer_list[i]); #Change 4, 5
self.custom_print("Experiment name : {}".format(experiment))
gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]);
gtf_.Dataset_Percent(percent_data);
dataset_type = gtf_.system_dict["dataset"]["dataset_type"];
dataset_train_path = gtf_.system_dict["dataset"]["train_path"];
dataset_val_path = gtf_.system_dict["dataset"]["val_path"];
csv_train = gtf_.system_dict["dataset"]["csv_train"];
csv_val = gtf_.system_dict["dataset"]["csv_val"];
if(dataset_type=="train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
elif(dataset_type=="csv_train"):
gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv");
elif(dataset_type=="csv_train-val"):
gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path],
path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]);
lr = gtf_.system_dict["hyper-parameters"]["learning_rate"] #Change 6
if(optimizer_list[i] == "sgd"):
gtf_.optimizer_sgd(lr);
elif(optimizer_list[i] == "nesterov_sgd"):
gtf_.optimizer_nesterov_sgd(lr);
elif(optimizer_list[i] == "rmsprop"):
gtf_.optimizer_rmsprop(lr);
elif(optimizer_list[i] == "momentum_rmsprop"):
gtf_.optimizer_momentum_rmsprop(lr);
elif(optimizer_list[i] == "adam"):
gtf_.optimizer_adam(lr);
elif(optimizer_list[i] == "adagrad"):
gtf_.optimizer_adagrad(lr);
elif(optimizer_list[i] == "adadelta"):
gtf_.optimizer_adadelta(lr);
elif(optimizer_list[i] == "adamax"):
gtf_.optimizer_adamax(lr);
elif(optimizer_list[i] == "nesterov_adam"):
gtf_.optimizer_nesterov_adam(lr);
elif(optimizer_list[i] == "signum"):
gtf_.optimizer_signum(lr);
gtf_.Reload(); #Change 7
gtf_.update_num_epochs(num_epochs);
gtf_.update_display_progress_realtime(False)
gtf_.update_save_intermediate_models(False);
total_time_per_epoch = gtf_.get_training_estimate();
total_time = total_time_per_epoch*num_epochs;
if(int(total_time//60) == 0):
self.custom_print("Estimated time : {} sec".format(total_time));
else:
self.custom_print("Estimated time : {} min".format(int(total_time//60)+1));
gtf_.Train();
self.custom_print("Experiment Complete");
self.custom_print("\n");
self.custom_print("Comparing Experiments");
from compare_prototype import compare
ctf_ = compare(verbose=0);
ctf_.Comparison("Comparison_" + analysis_name);
self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name));
training_accuracies = [];
validation_accuracies = [];
training_losses = [];
validation_losses = [];
tabular_data = [];
for i in range(len(optimizer_list)): #Change 8
project = analysis_name;
experiment = "Optimizer_" + str(optimizer_list[i]); #Change 9, 10
ctf_.Add_Experiment(project, experiment)
tmp = [];
tmp.append(experiment);
training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy";
tmp.append(np.load(training_accuracy_file)[-1]);
validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy";
tmp.append(np.load(validation_accuracy_file)[-1]);
training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy";
tmp.append(np.load(training_loss_file)[-1]);
validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy";
tmp.append(np.load(validation_loss_file)[-1]);
tabular_data.append(tmp)
ctf_.Generate_Statistics();
self.custom_print("Generated statistics post all epochs");
self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl'));
self.custom_print("");
return_dict = {};
for i in range(len(tabular_data)):
return_dict[tabular_data[i][0]] = {};
return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1];
return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2];
return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3];
return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4];
fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json";
system_dict = read_json(fname);
return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"];
if(state=="keep_none"):
shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name);
return return_dict
###############################################################################################################################################
Classes
class prototype (verbose=1)
-
Main class for Mxnet Backend
Args
verbose
:int
- Set verbosity levels 0 - Print Nothing 1 - Print desired details
Expand source code
class prototype(prototype_master): ''' Main class for Mxnet Backend Args: verbose (int): Set verbosity levels 0 - Print Nothing 1 - Print desired details ''' def __init__(self, verbose=1): super().__init__(verbose=verbose); self.system_dict["library"] = "Mxnet"; self.custom_print("Mxnet Version: {}".format(mx.__version__)); self.custom_print(""); ############################################################################################################################################### def Prototype(self, project_name, experiment_name, eval_infer=False, resume_train=False, copy_from=False, pseudo_copy_from=False, summary=False): ''' Create project and experiment for instantiation and running the experiments Args: project_name (str): Project Name experiment_name (str): Experiment Name eval_infer (bool): If set as True, model is loaded in evaluation mode resume_train (bool): If set as True, model is loaded from last checkpoint copy_from (list): [project, experiment] to copy from pseudo_copy_from (list): For creating sub-experiments while in hyper-parametric analysis state summary (list): Dummy variable Returns: None ''' self.set_system_project(project_name); self.set_system_experiment(experiment_name, eval_infer=eval_infer, resume_train=resume_train, copy_from=copy_from, pseudo_copy_from=pseudo_copy_from, summary=summary); self.custom_print("Experiment Details"); self.custom_print(" Project: {}".format(self.system_dict["project_name"])); self.custom_print(" Experiment: {}".format(self.system_dict["experiment_name"])); self.custom_print(" Dir: {}".format(self.system_dict["experiment_dir"])); self.custom_print(""); ############################################################################################################################################### ############################################################################################################################################### def Default(self, dataset_path=False, path_to_csv=False, delimiter=",", model_name="resnet18_v1", freeze_base_network=True, num_epochs=10): ''' Use monk in default (quick prototyping) mode Args: dataset_path (str, list): Path to Dataset folder 1) Single string if validation data does not exist 2) List [train_path, val_path] in case of separate train and val data path_to_csv (str, list): Path to csv file pointing towards images 1) Single string if validation data does not exist 2) List [train_path, val_path] in case of separate train and val data delimiter (str): Delimiter for csv file model_name (str): Base model name freeze_base_network (bool): If True base network is freezed num_epochs (int): Number of epochs to train the data Returns: None ''' if(self.system_dict["states"]["eval_infer"]): self.Dataset_Params(dataset_path=dataset_path, import_as_csv=import_as_csv, path_to_csv=path_to_csv, delimiter=delimiter); self.Dataset(); else: input_size=224; self.Dataset_Params(dataset_path=dataset_path, path_to_csv=path_to_csv, delimiter=delimiter, split=0.7, input_size=input_size, batch_size=4, shuffle_data=True, num_processors=psutil.cpu_count()); #train-val self.apply_random_horizontal_flip(probability=0.8, train=True, val=True); self.apply_normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], train=True, val=True, test=True); self.Dataset(); self.Model_Params(model_name=model_name, freeze_base_network=freeze_base_network, use_gpu=True, use_pretrained=True); self.Model(); model_name = self.system_dict["model"]["params"]["model_name"]; if("resnet" in model_name or "alexnet" in model_name or "darknet" in model_name or "xception" in model_name): self.optimizer_sgd(0.01); if(num_epochs>10): self.lr_step_decrease(max(min(num_epochs//3, 8), 1), gamma=0.1); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("vgg" in model_name): self.optimizer_sgd(0.001); if(num_epochs>10): self.lr_step_decrease(max(min(num_epochs//3, 8), 1), gamma=0.1); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("squeezenet1.0" in model_name): self.optimizer_sgd(0.04, weight_decay=0.0002); self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("squeezenet1.1" in model_name): self.optimizer_sgd(0.001, weight_decay=0.0002); self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("dense" in model_name): self.optimizer_sgd(0.01, weight_decay=0.0001); if(num_epochs>10): self.lr_multistep_decrease([max(num_epochs//2, 1), max(3*num_epochs//4, 2)]); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("resnext" in model_name or "senet" in model_name): self.optimizer_sgd(0.01, weight_decay=0.0001); if(num_epochs>10): self.lr_step_decrease(max(num_epochs//3, 1), gamma=0.1); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("mobile" in model_name): self.optimizer_sgd(0.01, weight_decay=0.00004, momentum=0.9); self.lr_step_decrease(1, gamma=0.97); self.loss_softmax_crossentropy(); elif("inception" in model_name): self.optimizer_sgd(0.045, weight_decay=0.0001, momentum=0.9); self.lr_step_decrease(1, gamma=0.9); self.loss_softmax_crossentropy(); self.Training_Params(num_epochs=num_epochs, display_progress=True, display_progress_realtime=True, save_intermediate_models=True, intermediate_model_prefix="intermediate_model_", save_training_logs=True); self.system_dict["hyper-parameters"]["status"] = True; save(self.system_dict); ############################################################################################################################################### ############################################################################################################################################### def Summary(self): ''' Print summary of entire project Args: None Returns: None ''' print_summary(self.system_dict["fname_relative"]); ############################################################################################################################################### ############################################################################################################################################### def List_Models(self): ''' List all base models supported. Args: None Returns: None ''' self.print_list_models(); ############################################################################################################################################### ## Will be depricated in v2.0 ############################################################################################################################################### def List_Layers(self): ''' List all layers available for appending the base model. Args: None Returns: None ''' self.print_list_layers_transfer_learning(); ############################################################################################################################################### ############################################################################################################################################### def List_Layers_Transfer_Learning(self): ''' List all layers available for appending the base model. Args: None Returns: None ''' self.print_list_layers_transfer_learning(); ############################################################################################################################################### ############################################################################################################################################### def List_Layers_Custom_Model(self): ''' List all layers available for building a custom model. Args: None Returns: None ''' self.print_list_layers_custom_model(); ############################################################################################################################################### ## Will be depricated in v2.0 ############################################################################################################################################### def List_Activations(self): ''' List all activations available for appending the base model. Args: None Returns: None ''' self.print_list_activations_transfer_learning(); ############################################################################################################################################### ############################################################################################################################################### def List_Activations_Transfer_Learning(self): ''' List all activations available for appending the base model. Args: None Returns: None ''' self.print_list_activations_transfer_learning(); ############################################################################################################################################### ############################################################################################################################################### def List_Activations_Custom_Model(self): ''' List all activations available for building a custom model. Args: None Returns: None ''' self.print_list_activations_custom_model(); ############################################################################################################################################### ############################################################################################################################################### def List_Losses(self): ''' List all loss functions available. Args: None Returns: None ''' self.print_list_losses(); ############################################################################################################################################### ############################################################################################################################################### def List_Optimizers(self): ''' List all optimizers functions available. Args: None Returns: None ''' self.print_list_optimizers(); ############################################################################################################################################### ############################################################################################################################################### def List_Schedulers(self): ''' List all learning rate scheduler functions available. Args: None Returns: None ''' self.print_list_schedulers(); ############################################################################################################################################### ############################################################################################################################################### def List_Transforms(self): ''' List all data transformation functions available. Args: None Returns: None ''' self.print_list_transforms(); ############################################################################################################################################### ############################################################################################################################################### def List_Blocks(self): ''' List all blocks available for building a custom model. Args: None Returns: None ''' self.print_list_blocks(); ############################################################################################################################################### ############################################################################################################################################### def Analyse_Learning_Rates(self, analysis_name, lr_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse learning rate Takes in a list of learning rates and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis lr_list (list): List of learning rates. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Learning rate analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(lr_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(lr_list))); #Change 3 experiment = "Learning_Rate_" + str(lr_list[i]); #Change 4 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_learning_rate(lr_list[i]) #Change 5 gtf_.Reload(); #Change 6 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(lr_list)): #Change 7 project = analysis_name; experiment = "Learning_Rate_" + str(lr_list[i]); #Change 8 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict ############################################################################################################################################### ############################################################################################################################################### def Analyse_Input_Sizes(self, analysis_name, inp_size_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse input sizes Takes in a list of input sizes and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list): List of input_sizes. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Input Size analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(inp_size_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(inp_size_list))); #Change 3 experiment = "Input_Size_" + str(inp_size_list[i]); #Change 4 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_input_size(inp_size_list[i]) #Change 5 gtf_.Reload(); #Change 6 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(inp_size_list)): #Change 7 project = analysis_name; experiment = "Input_Size_" + str(inp_size_list[i]); #Change 8 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict ############################################################################################################################################### ############################################################################################################################################### def Analyse_Batch_Sizes(self, analysis_name, batch_size_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse batch sizes Takes in a list of batch sizes and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list): List of batch sizes. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Batch Size analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(batch_size_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(batch_size_list))); #Change 3 experiment = "Batch_Size_" + str(batch_size_list[i]); #Change 4, 5 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_batch_size(batch_size_list[i]) #Change 6 gtf_.Reload(); #Change 7 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(batch_size_list)): #Change 8 project = analysis_name; experiment = "Batch_Size_" + str(batch_size_list[i]); #Change 9, 10 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict ############################################################################################################################################### ############################################################################################################################################### def Analyse_Models(self, analysis_name, model_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse base models Takes in a list of base models and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list of list): List of base models. The format is [model_name_string, freeze_base_model_bool, use_pretrained_model_bool] 1) First arg - Model name in string 2) Second arg - Whether to freeze base model or not 3) Thrid arg - Whether to use pretrained model or use randomly initialized weights percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Model analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(model_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(model_list))); #Change 3 if(model_list[i][1]): experiment = "Model_" + str(model_list[i][0]) + "_freeze_base"; #Change 4, 5 else: experiment = "Model_" + str(model_list[i][0]) + "_unfreeze_base"; if(model_list[i][2]): experiment += "_pretrained"; else: experiment += "_uninitialized"; self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_model_name(model_list[i][0]) #Change 6 gtf_.update_freeze_base_network(model_list[i][1]) gtf_.update_use_pretrained(model_list[i][2]) gtf_.Reload(); #Change 7 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(model_list)): #Change 8 project = analysis_name; if(model_list[i][1]): experiment = "Model_" + str(model_list[i][0]) + "_freeze_base"; #Change 9, 10 else: experiment = "Model_" + str(model_list[i][0]) + "_unfreeze_base"; if(model_list[i][2]): experiment += "_pretrained"; else: experiment += "_uninitialized"; ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict ############################################################################################################################################### ############################################################################################################################################### def Analyse_Optimizers(self, analysis_name, optimizer_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse optimizers Takes in a list of optimizers and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list): List of optimizers. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Optimizer analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(optimizer_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(optimizer_list))); #Change 3 experiment = "Optimizer_" + str(optimizer_list[i]); #Change 4, 5 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); lr = gtf_.system_dict["hyper-parameters"]["learning_rate"] #Change 6 if(optimizer_list[i] == "sgd"): gtf_.optimizer_sgd(lr); elif(optimizer_list[i] == "nesterov_sgd"): gtf_.optimizer_nesterov_sgd(lr); elif(optimizer_list[i] == "rmsprop"): gtf_.optimizer_rmsprop(lr); elif(optimizer_list[i] == "momentum_rmsprop"): gtf_.optimizer_momentum_rmsprop(lr); elif(optimizer_list[i] == "adam"): gtf_.optimizer_adam(lr); elif(optimizer_list[i] == "adagrad"): gtf_.optimizer_adagrad(lr); elif(optimizer_list[i] == "adadelta"): gtf_.optimizer_adadelta(lr); elif(optimizer_list[i] == "adamax"): gtf_.optimizer_adamax(lr); elif(optimizer_list[i] == "nesterov_adam"): gtf_.optimizer_nesterov_adam(lr); elif(optimizer_list[i] == "signum"): gtf_.optimizer_signum(lr); gtf_.Reload(); #Change 7 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(optimizer_list)): #Change 8 project = analysis_name; experiment = "Optimizer_" + str(optimizer_list[i]); #Change 9, 10 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict
Ancestors
- gluon.finetune.level_14_master_main.prototype_master
- gluon.finetune.level_13_updates_main.prototype_updates
- gluon.finetune.level_12_losses_main.prototype_losses
- gluon.finetune.level_11_optimizers_main.prototype_optimizers
- gluon.finetune.level_10_schedulers_main.prototype_schedulers
- gluon.finetune.level_9_transforms_main.prototype_transforms
- gluon.finetune.level_8_layers_main.prototype_layers
- gluon.finetune.level_7_aux_main.prototype_aux
- gluon.finetune.level_6_params_main.prototype_params
- gluon.finetune.level_5_state_base.finetune_state
- gluon.finetune.level_4_evaluation_base.finetune_evaluation
- gluon.finetune.level_3_training_base.finetune_training
- gluon.finetune.level_2_model_base.finetune_model
- gluon.finetune.level_1_dataset_base.finetune_dataset
- system.base_class.system
Methods
def Analyse_Batch_Sizes(self, analysis_name, batch_size_list, percent_data, num_epochs=2, state='keep_all')
-
Hyperparameter Tuner - Analyse batch sizes Takes in a list of batch sizes and trains on a part of dataset Provides summaries and graphs on every sub-experiment created
Args
analysis_name
:str
- A suitable name for analysis
inp_size_list
:list
- List of batch sizes.
percent_data
:int
- Percentage of complete dataset to run experiments on.
num_epochs
:int
- Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment
Returns
dict
- Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
Expand source code
def Analyse_Batch_Sizes(self, analysis_name, batch_size_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse batch sizes Takes in a list of batch sizes and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list): List of batch sizes. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Batch Size analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(batch_size_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(batch_size_list))); #Change 3 experiment = "Batch_Size_" + str(batch_size_list[i]); #Change 4, 5 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_batch_size(batch_size_list[i]) #Change 6 gtf_.Reload(); #Change 7 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(batch_size_list)): #Change 8 project = analysis_name; experiment = "Batch_Size_" + str(batch_size_list[i]); #Change 9, 10 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict
def Analyse_Input_Sizes(self, analysis_name, inp_size_list, percent_data, num_epochs=2, state='keep_all')
-
Hyperparameter Tuner - Analyse input sizes Takes in a list of input sizes and trains on a part of dataset Provides summaries and graphs on every sub-experiment created
Args
analysis_name
:str
- A suitable name for analysis
inp_size_list
:list
- List of input_sizes.
percent_data
:int
- Percentage of complete dataset to run experiments on.
num_epochs
:int
- Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment
Returns
dict
- Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
Expand source code
def Analyse_Input_Sizes(self, analysis_name, inp_size_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse input sizes Takes in a list of input sizes and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list): List of input_sizes. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Input Size analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(inp_size_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(inp_size_list))); #Change 3 experiment = "Input_Size_" + str(inp_size_list[i]); #Change 4 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_input_size(inp_size_list[i]) #Change 5 gtf_.Reload(); #Change 6 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(inp_size_list)): #Change 7 project = analysis_name; experiment = "Input_Size_" + str(inp_size_list[i]); #Change 8 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict
def Analyse_Learning_Rates(self, analysis_name, lr_list, percent_data, num_epochs=2, state='keep_all')
-
Hyperparameter Tuner - Analyse learning rate Takes in a list of learning rates and trains on a part of dataset Provides summaries and graphs on every sub-experiment created
Args
analysis_name
:str
- A suitable name for analysis
lr_list
:list
- List of learning rates.
percent_data
:int
- Percentage of complete dataset to run experiments on.
num_epochs
:int
- Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment
Returns
dict
- Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
Expand source code
def Analyse_Learning_Rates(self, analysis_name, lr_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse learning rate Takes in a list of learning rates and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis lr_list (list): List of learning rates. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Learning rate analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(lr_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(lr_list))); #Change 3 experiment = "Learning_Rate_" + str(lr_list[i]); #Change 4 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_learning_rate(lr_list[i]) #Change 5 gtf_.Reload(); #Change 6 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(lr_list)): #Change 7 project = analysis_name; experiment = "Learning_Rate_" + str(lr_list[i]); #Change 8 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict
def Analyse_Models(self, analysis_name, model_list, percent_data, num_epochs=2, state='keep_all')
-
Hyperparameter Tuner - Analyse base models Takes in a list of base models and trains on a part of dataset Provides summaries and graphs on every sub-experiment created
Args
analysis_name
:str
- A suitable name for analysis
inp_size_list
:list
oflist
- List of base models. The format is [model_name_string, freeze_base_model_bool, use_pretrained_model_bool] 1) First arg - Model name in string 2) Second arg - Whether to freeze base model or not 3) Thrid arg - Whether to use pretrained model or use randomly initialized weights
percent_data
:int
- Percentage of complete dataset to run experiments on.
num_epochs
:int
- Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment
Returns
dict
- Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
Expand source code
def Analyse_Models(self, analysis_name, model_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse base models Takes in a list of base models and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list of list): List of base models. The format is [model_name_string, freeze_base_model_bool, use_pretrained_model_bool] 1) First arg - Model name in string 2) Second arg - Whether to freeze base model or not 3) Thrid arg - Whether to use pretrained model or use randomly initialized weights percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Model analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(model_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(model_list))); #Change 3 if(model_list[i][1]): experiment = "Model_" + str(model_list[i][0]) + "_freeze_base"; #Change 4, 5 else: experiment = "Model_" + str(model_list[i][0]) + "_unfreeze_base"; if(model_list[i][2]): experiment += "_pretrained"; else: experiment += "_uninitialized"; self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); gtf_.update_model_name(model_list[i][0]) #Change 6 gtf_.update_freeze_base_network(model_list[i][1]) gtf_.update_use_pretrained(model_list[i][2]) gtf_.Reload(); #Change 7 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(model_list)): #Change 8 project = analysis_name; if(model_list[i][1]): experiment = "Model_" + str(model_list[i][0]) + "_freeze_base"; #Change 9, 10 else: experiment = "Model_" + str(model_list[i][0]) + "_unfreeze_base"; if(model_list[i][2]): experiment += "_pretrained"; else: experiment += "_uninitialized"; ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict
def Analyse_Optimizers(self, analysis_name, optimizer_list, percent_data, num_epochs=2, state='keep_all')
-
Hyperparameter Tuner - Analyse optimizers Takes in a list of optimizers and trains on a part of dataset Provides summaries and graphs on every sub-experiment created
Args
analysis_name
:str
- A suitable name for analysis
inp_size_list
:list
- List of optimizers.
percent_data
:int
- Percentage of complete dataset to run experiments on.
num_epochs
:int
- Number of epochs for each sub-experiment
state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment
Returns
dict
- Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment.
Expand source code
def Analyse_Optimizers(self, analysis_name, optimizer_list, percent_data, num_epochs=2, state="keep_all"): ''' Hyperparameter Tuner - Analyse optimizers Takes in a list of optimizers and trains on a part of dataset Provides summaries and graphs on every sub-experiment created Args: analysis_name (str): A suitable name for analysis inp_size_list (list): List of optimizers. percent_data (int): Percentage of complete dataset to run experiments on. num_epochs (int): Number of epochs for each sub-experiment state ("str"): If set as "keep_all", keeps every file in the sub-experiment If set as "keep_none", keeps only comparison files for each experiment Returns: dict: Tabular data on training_accuracy, validation_accuracy, training_loss, validation_loss and training_time for each experiment. ''' from gluon_prototype import prototype project = analysis_name; self.custom_print(""); self.custom_print("Running Optimizer analysis"); #Change 1 self.custom_print("Analysis Name : {}".format(project)); self.custom_print(""); for i in range(len(optimizer_list)): #Change 2 gtf_ = prototype(verbose=0); self.custom_print("Running experiment : {}/{}".format(i+1, len(optimizer_list))); #Change 3 experiment = "Optimizer_" + str(optimizer_list[i]); #Change 4, 5 self.custom_print("Experiment name : {}".format(experiment)) gtf_.Prototype(project, experiment, pseudo_copy_from=[self.system_dict["project_name"], self.system_dict["experiment_name"]]); gtf_.Dataset_Percent(percent_data); dataset_type = gtf_.system_dict["dataset"]["dataset_type"]; dataset_train_path = gtf_.system_dict["dataset"]["train_path"]; dataset_val_path = gtf_.system_dict["dataset"]["val_path"]; csv_train = gtf_.system_dict["dataset"]["csv_train"]; csv_val = gtf_.system_dict["dataset"]["csv_val"]; if(dataset_type=="train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); elif(dataset_type=="csv_train"): gtf_.update_dataset(dataset_path=dataset_train_path, path_to_csv="sampled_dataset_train.csv"); elif(dataset_type=="csv_train-val"): gtf_.update_dataset(dataset_path=[dataset_train_path, dataset_val_path], path_to_csv=["sampled_dataset_train.csv", "sampled_dataset_val.csv"]); lr = gtf_.system_dict["hyper-parameters"]["learning_rate"] #Change 6 if(optimizer_list[i] == "sgd"): gtf_.optimizer_sgd(lr); elif(optimizer_list[i] == "nesterov_sgd"): gtf_.optimizer_nesterov_sgd(lr); elif(optimizer_list[i] == "rmsprop"): gtf_.optimizer_rmsprop(lr); elif(optimizer_list[i] == "momentum_rmsprop"): gtf_.optimizer_momentum_rmsprop(lr); elif(optimizer_list[i] == "adam"): gtf_.optimizer_adam(lr); elif(optimizer_list[i] == "adagrad"): gtf_.optimizer_adagrad(lr); elif(optimizer_list[i] == "adadelta"): gtf_.optimizer_adadelta(lr); elif(optimizer_list[i] == "adamax"): gtf_.optimizer_adamax(lr); elif(optimizer_list[i] == "nesterov_adam"): gtf_.optimizer_nesterov_adam(lr); elif(optimizer_list[i] == "signum"): gtf_.optimizer_signum(lr); gtf_.Reload(); #Change 7 gtf_.update_num_epochs(num_epochs); gtf_.update_display_progress_realtime(False) gtf_.update_save_intermediate_models(False); total_time_per_epoch = gtf_.get_training_estimate(); total_time = total_time_per_epoch*num_epochs; if(int(total_time//60) == 0): self.custom_print("Estimated time : {} sec".format(total_time)); else: self.custom_print("Estimated time : {} min".format(int(total_time//60)+1)); gtf_.Train(); self.custom_print("Experiment Complete"); self.custom_print("\n"); self.custom_print("Comparing Experiments"); from compare_prototype import compare ctf_ = compare(verbose=0); ctf_.Comparison("Comparison_" + analysis_name); self.custom_print("Comparison ID: {}".format("Comparison_" + analysis_name)); training_accuracies = []; validation_accuracies = []; training_losses = []; validation_losses = []; tabular_data = []; for i in range(len(optimizer_list)): #Change 8 project = analysis_name; experiment = "Optimizer_" + str(optimizer_list[i]); #Change 9, 10 ctf_.Add_Experiment(project, experiment) tmp = []; tmp.append(experiment); training_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_acc_history.npy"; tmp.append(np.load(training_accuracy_file)[-1]); validation_accuracy_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_acc_history.npy"; tmp.append(np.load(validation_accuracy_file)[-1]); training_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/train_loss_history.npy"; tmp.append(np.load(training_loss_file)[-1]); validation_loss_file = self.system_dict["master_systems_dir_relative"] + "/" + project + "/" + experiment + "/output/logs/val_loss_history.npy"; tmp.append(np.load(validation_loss_file)[-1]); tabular_data.append(tmp) ctf_.Generate_Statistics(); self.custom_print("Generated statistics post all epochs"); self.custom_print(tabulate(tabular_data, headers=['Experiment Name', 'Train Acc', 'Val Acc', 'Train Loss', 'Val Loss'], tablefmt='orgtbl')); self.custom_print(""); return_dict = {}; for i in range(len(tabular_data)): return_dict[tabular_data[i][0]] = {}; return_dict[tabular_data[i][0]]["training_accuracy"] = tabular_data[i][1]; return_dict[tabular_data[i][0]]["validation_accuracy"] = tabular_data[i][2]; return_dict[tabular_data[i][0]]["training_loss"] = tabular_data[i][3]; return_dict[tabular_data[i][0]]["validation_loss"] = tabular_data[i][4]; fname = self.system_dict["master_systems_dir_relative"] + analysis_name + "/" + tabular_data[i][0] + "/experiment_state.json"; system_dict = read_json(fname); return_dict[tabular_data[i][0]]["training_time"] = system_dict["training"]["outputs"]["training_time"]; if(state=="keep_none"): shutil.rmtree(self.system_dict["master_systems_dir_relative"] + analysis_name); return return_dict
def Default(self, dataset_path=False, path_to_csv=False, delimiter=',', model_name='resnet18_v1', freeze_base_network=True, num_epochs=10)
-
Use monk in default (quick prototyping) mode
Args
dataset_path
:str
,list
- Path to Dataset folder 1) Single string if validation data does not exist 2) List [train_path, val_path] in case of separate train and val data
path_to_csv
:str
,list
- Path to csv file pointing towards images 1) Single string if validation data does not exist 2) List [train_path, val_path] in case of separate train and val data
delimiter
:str
- Delimiter for csv file
model_name
:str
- Base model name
freeze_base_network
:bool
- If True base network is freezed
num_epochs
:int
- Number of epochs to train the data
Returns
None
Expand source code
def Default(self, dataset_path=False, path_to_csv=False, delimiter=",", model_name="resnet18_v1", freeze_base_network=True, num_epochs=10): ''' Use monk in default (quick prototyping) mode Args: dataset_path (str, list): Path to Dataset folder 1) Single string if validation data does not exist 2) List [train_path, val_path] in case of separate train and val data path_to_csv (str, list): Path to csv file pointing towards images 1) Single string if validation data does not exist 2) List [train_path, val_path] in case of separate train and val data delimiter (str): Delimiter for csv file model_name (str): Base model name freeze_base_network (bool): If True base network is freezed num_epochs (int): Number of epochs to train the data Returns: None ''' if(self.system_dict["states"]["eval_infer"]): self.Dataset_Params(dataset_path=dataset_path, import_as_csv=import_as_csv, path_to_csv=path_to_csv, delimiter=delimiter); self.Dataset(); else: input_size=224; self.Dataset_Params(dataset_path=dataset_path, path_to_csv=path_to_csv, delimiter=delimiter, split=0.7, input_size=input_size, batch_size=4, shuffle_data=True, num_processors=psutil.cpu_count()); #train-val self.apply_random_horizontal_flip(probability=0.8, train=True, val=True); self.apply_normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], train=True, val=True, test=True); self.Dataset(); self.Model_Params(model_name=model_name, freeze_base_network=freeze_base_network, use_gpu=True, use_pretrained=True); self.Model(); model_name = self.system_dict["model"]["params"]["model_name"]; if("resnet" in model_name or "alexnet" in model_name or "darknet" in model_name or "xception" in model_name): self.optimizer_sgd(0.01); if(num_epochs>10): self.lr_step_decrease(max(min(num_epochs//3, 8), 1), gamma=0.1); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("vgg" in model_name): self.optimizer_sgd(0.001); if(num_epochs>10): self.lr_step_decrease(max(min(num_epochs//3, 8), 1), gamma=0.1); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("squeezenet1.0" in model_name): self.optimizer_sgd(0.04, weight_decay=0.0002); self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("squeezenet1.1" in model_name): self.optimizer_sgd(0.001, weight_decay=0.0002); self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("dense" in model_name): self.optimizer_sgd(0.01, weight_decay=0.0001); if(num_epochs>10): self.lr_multistep_decrease([max(num_epochs//2, 1), max(3*num_epochs//4, 2)]); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("resnext" in model_name or "senet" in model_name): self.optimizer_sgd(0.01, weight_decay=0.0001); if(num_epochs>10): self.lr_step_decrease(max(num_epochs//3, 1), gamma=0.1); else: self.lr_step_decrease(1, gamma=0.98); self.loss_softmax_crossentropy(); elif("mobile" in model_name): self.optimizer_sgd(0.01, weight_decay=0.00004, momentum=0.9); self.lr_step_decrease(1, gamma=0.97); self.loss_softmax_crossentropy(); elif("inception" in model_name): self.optimizer_sgd(0.045, weight_decay=0.0001, momentum=0.9); self.lr_step_decrease(1, gamma=0.9); self.loss_softmax_crossentropy(); self.Training_Params(num_epochs=num_epochs, display_progress=True, display_progress_realtime=True, save_intermediate_models=True, intermediate_model_prefix="intermediate_model_", save_training_logs=True); self.system_dict["hyper-parameters"]["status"] = True; save(self.system_dict);
def List_Activations(self)
-
List all activations available for appending the base model.
Args
None
Returns
None
Expand source code
def List_Activations(self): ''' List all activations available for appending the base model. Args: None Returns: None ''' self.print_list_activations_transfer_learning();
def List_Activations_Custom_Model(self)
-
List all activations available for building a custom model.
Args
None
Returns
None
Expand source code
def List_Activations_Custom_Model(self): ''' List all activations available for building a custom model. Args: None Returns: None ''' self.print_list_activations_custom_model();
def List_Activations_Transfer_Learning(self)
-
List all activations available for appending the base model.
Args
None
Returns
None
Expand source code
def List_Activations_Transfer_Learning(self): ''' List all activations available for appending the base model. Args: None Returns: None ''' self.print_list_activations_transfer_learning();
def List_Blocks(self)
-
List all blocks available for building a custom model.
Args
None
Returns
None
Expand source code
def List_Blocks(self): ''' List all blocks available for building a custom model. Args: None Returns: None ''' self.print_list_blocks();
def List_Layers(self)
-
List all layers available for appending the base model.
Args
None
Returns
None
Expand source code
def List_Layers(self): ''' List all layers available for appending the base model. Args: None Returns: None ''' self.print_list_layers_transfer_learning();
def List_Layers_Custom_Model(self)
-
List all layers available for building a custom model.
Args
None
Returns
None
Expand source code
def List_Layers_Custom_Model(self): ''' List all layers available for building a custom model. Args: None Returns: None ''' self.print_list_layers_custom_model();
def List_Layers_Transfer_Learning(self)
-
List all layers available for appending the base model.
Args
None
Returns
None
Expand source code
def List_Layers_Transfer_Learning(self): ''' List all layers available for appending the base model. Args: None Returns: None ''' self.print_list_layers_transfer_learning();
def List_Losses(self)
-
List all loss functions available.
Args
None
Returns
None
Expand source code
def List_Losses(self): ''' List all loss functions available. Args: None Returns: None ''' self.print_list_losses();
def List_Models(self)
-
List all base models supported.
Args
None
Returns
None
Expand source code
def List_Models(self): ''' List all base models supported. Args: None Returns: None ''' self.print_list_models();
def List_Optimizers(self)
-
List all optimizers functions available.
Args
None
Returns
None
Expand source code
def List_Optimizers(self): ''' List all optimizers functions available. Args: None Returns: None ''' self.print_list_optimizers();
def List_Schedulers(self)
-
List all learning rate scheduler functions available.
Args
None
Returns
None
Expand source code
def List_Schedulers(self): ''' List all learning rate scheduler functions available. Args: None Returns: None ''' self.print_list_schedulers();
def List_Transforms(self)
-
List all data transformation functions available.
Args
None
Returns
None
Expand source code
def List_Transforms(self): ''' List all data transformation functions available. Args: None Returns: None ''' self.print_list_transforms();
def Prototype(self, project_name, experiment_name, eval_infer=False, resume_train=False, copy_from=False, pseudo_copy_from=False, summary=False)
-
Create project and experiment for instantiation and running the experiments
Args
project_name
:str
- Project Name
experiment_name
:str
- Experiment Name
eval_infer
:bool
- If set as True, model is loaded in evaluation mode
resume_train
:bool
- If set as True, model is loaded from last checkpoint
copy_from
:list
- [project, experiment] to copy from
pseudo_copy_from
:list
- For creating sub-experiments while in hyper-parametric analysis state
summary
:list
- Dummy variable
Returns
None
Expand source code
def Prototype(self, project_name, experiment_name, eval_infer=False, resume_train=False, copy_from=False, pseudo_copy_from=False, summary=False): ''' Create project and experiment for instantiation and running the experiments Args: project_name (str): Project Name experiment_name (str): Experiment Name eval_infer (bool): If set as True, model is loaded in evaluation mode resume_train (bool): If set as True, model is loaded from last checkpoint copy_from (list): [project, experiment] to copy from pseudo_copy_from (list): For creating sub-experiments while in hyper-parametric analysis state summary (list): Dummy variable Returns: None ''' self.set_system_project(project_name); self.set_system_experiment(experiment_name, eval_infer=eval_infer, resume_train=resume_train, copy_from=copy_from, pseudo_copy_from=pseudo_copy_from, summary=summary); self.custom_print("Experiment Details"); self.custom_print(" Project: {}".format(self.system_dict["project_name"])); self.custom_print(" Experiment: {}".format(self.system_dict["experiment_name"])); self.custom_print(" Dir: {}".format(self.system_dict["experiment_dir"])); self.custom_print("");
def Summary(self)
-
Print summary of entire project
Args
None
Returns
None
Expand source code
def Summary(self): ''' Print summary of entire project Args: None Returns: None ''' print_summary(self.system_dict["fname_relative"]);