Module monk.system.eda.eda

Expand source code
from system.eda.imports import *
from system.eda.utils import *
from system.graphs.bar import create_plot
from system.imports import *




def class_imbalance(system_dict, show_img, save_img):
    '''
    Find class imbalance

    Args:
        system_dict (dict): System dictionary storing experiment state and set variables
        show_img (bool): If True, displays bar graph for images per class 
        save_img (bool): If True, saves bar graph for images per class

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''

    dataset_type = system_dict["dataset"]["dataset_type"];
    dataset_train_path = system_dict["dataset"]["train_path"];
    dataset_val_path = system_dict["dataset"]["val_path"];
    delimiter = system_dict["dataset"]["params"]["delimiter"];
    log_dir = system_dict["log_dir"];

    if("csv" in dataset_type):
        csv_train = system_dict["dataset"]["csv_train"];
        csv_val = system_dict["dataset"]["csv_val"];

    if(dataset_type == "train"):
        classes_folder, classes_folder_strength = populate_from_folder_train(dataset_train_path);                   
    elif(dataset_type == "train-val"):
        classes_folder, classes_folder_strength = populate_from_folder_trainval(dataset_train_path, dataset_val_path);

    elif(dataset_type == "csv_train"):
        classes_folder, classes_folder_strength = populate_from_csv_train(csv_train, delimiter);

    elif(dataset_type == "csv_train-val"):
        classes_folder, classes_folder_strength = populate_from_csv_trainval(csv_train, csv_val, delimiter);

    create_plot(classes_folder, classes_folder_strength, log_dir, show_img, save_img);

    return classes_folder, classes_folder_strength;




def corrupted_missing_images(system_dict, check_missing, check_corrupt):
    '''
    Find corrupt and missing images in dataset

    Args:
        system_dict (dict): System dictionary storing experiment state and set variables
        check_missing (bool): If True, checks for missing images in csv type dataset
        check_corrupt (bool): If True, checks for corrupted images in foldered and csv dataset

    Returns:
        list: List of images missing from training set
        list: List of images missing from validation set
        list: List of images corrupted in training set
        list: List of images corrupted in validation set
    '''
    dataset_type = system_dict["dataset"]["dataset_type"];
    dataset_train_path = system_dict["dataset"]["train_path"];
    dataset_val_path = system_dict["dataset"]["val_path"];

    if("csv" in dataset_type):
        csv_train = system_dict["dataset"]["csv_train"];
        csv_val = system_dict["dataset"]["csv_val"];
        delimiter = system_dict["dataset"]["params"]["delimiter"];

    missing_images_train = None;
    missing_images_val = None;
    corrupt_images_train = None;
    corrupt_images_val = None;



    if(dataset_type == "train"):
        if(check_missing):
            x=0;
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_foldered(dataset_train_path, verbose=system_dict["verbose"]);
    elif(dataset_type == "train-val"):
        if(check_missing):
            x=0;
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_foldered(dataset_train_path, verbose=system_dict["verbose"])
            corrupt_images_val = populate_corrupt_from_foldered(dataset_val_path, verbose=system_dict["verbose"])
    elif(dataset_type == "csv_train"):
        if(check_missing):
            missing_images_train = populate_missing(csv_train, dataset_train_path, delimiter);
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_csv(csv_train, dataset_train_path, delimiter, verbose=system_dict["verbose"]);
    elif(dataset_type == "csv_train-val"):
        if(check_missing):
            missing_images_train = populate_missing(csv_train, dataset_train_path, delimiter);
            missing_images_val = populate_missing(csv_val, dataset_val_path, delimiter);
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_csv(csv_train, dataset_train_path, delimiter, verbose=system_dict["verbose"]);
            corrupt_images_val = populate_corrupt_from_csv(csv_val, dataset_val_path, delimiter, verbose=system_dict["verbose"]);

    return missing_images_train, missing_images_val, corrupt_images_train, corrupt_images_val;

Functions

def class_imbalance(system_dict, show_img, save_img)

Find class imbalance

Args

system_dict : dict
System dictionary storing experiment state and set variables
show_img : bool
If True, displays bar graph for images per class
save_img : bool
If True, saves bar graph for images per class

Returns

list
List of classes
list
List of number of images in every class
Expand source code
def class_imbalance(system_dict, show_img, save_img):
    '''
    Find class imbalance

    Args:
        system_dict (dict): System dictionary storing experiment state and set variables
        show_img (bool): If True, displays bar graph for images per class 
        save_img (bool): If True, saves bar graph for images per class

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''

    dataset_type = system_dict["dataset"]["dataset_type"];
    dataset_train_path = system_dict["dataset"]["train_path"];
    dataset_val_path = system_dict["dataset"]["val_path"];
    delimiter = system_dict["dataset"]["params"]["delimiter"];
    log_dir = system_dict["log_dir"];

    if("csv" in dataset_type):
        csv_train = system_dict["dataset"]["csv_train"];
        csv_val = system_dict["dataset"]["csv_val"];

    if(dataset_type == "train"):
        classes_folder, classes_folder_strength = populate_from_folder_train(dataset_train_path);                   
    elif(dataset_type == "train-val"):
        classes_folder, classes_folder_strength = populate_from_folder_trainval(dataset_train_path, dataset_val_path);

    elif(dataset_type == "csv_train"):
        classes_folder, classes_folder_strength = populate_from_csv_train(csv_train, delimiter);

    elif(dataset_type == "csv_train-val"):
        classes_folder, classes_folder_strength = populate_from_csv_trainval(csv_train, csv_val, delimiter);

    create_plot(classes_folder, classes_folder_strength, log_dir, show_img, save_img);

    return classes_folder, classes_folder_strength;
def corrupted_missing_images(system_dict, check_missing, check_corrupt)

Find corrupt and missing images in dataset

Args

system_dict : dict
System dictionary storing experiment state and set variables
check_missing : bool
If True, checks for missing images in csv type dataset
check_corrupt : bool
If True, checks for corrupted images in foldered and csv dataset

Returns

list
List of images missing from training set
list
List of images missing from validation set
list
List of images corrupted in training set
list
List of images corrupted in validation set
Expand source code
def corrupted_missing_images(system_dict, check_missing, check_corrupt):
    '''
    Find corrupt and missing images in dataset

    Args:
        system_dict (dict): System dictionary storing experiment state and set variables
        check_missing (bool): If True, checks for missing images in csv type dataset
        check_corrupt (bool): If True, checks for corrupted images in foldered and csv dataset

    Returns:
        list: List of images missing from training set
        list: List of images missing from validation set
        list: List of images corrupted in training set
        list: List of images corrupted in validation set
    '''
    dataset_type = system_dict["dataset"]["dataset_type"];
    dataset_train_path = system_dict["dataset"]["train_path"];
    dataset_val_path = system_dict["dataset"]["val_path"];

    if("csv" in dataset_type):
        csv_train = system_dict["dataset"]["csv_train"];
        csv_val = system_dict["dataset"]["csv_val"];
        delimiter = system_dict["dataset"]["params"]["delimiter"];

    missing_images_train = None;
    missing_images_val = None;
    corrupt_images_train = None;
    corrupt_images_val = None;



    if(dataset_type == "train"):
        if(check_missing):
            x=0;
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_foldered(dataset_train_path, verbose=system_dict["verbose"]);
    elif(dataset_type == "train-val"):
        if(check_missing):
            x=0;
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_foldered(dataset_train_path, verbose=system_dict["verbose"])
            corrupt_images_val = populate_corrupt_from_foldered(dataset_val_path, verbose=system_dict["verbose"])
    elif(dataset_type == "csv_train"):
        if(check_missing):
            missing_images_train = populate_missing(csv_train, dataset_train_path, delimiter);
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_csv(csv_train, dataset_train_path, delimiter, verbose=system_dict["verbose"]);
    elif(dataset_type == "csv_train-val"):
        if(check_missing):
            missing_images_train = populate_missing(csv_train, dataset_train_path, delimiter);
            missing_images_val = populate_missing(csv_val, dataset_val_path, delimiter);
        if(check_corrupt):
            corrupt_images_train = populate_corrupt_from_csv(csv_train, dataset_train_path, delimiter, verbose=system_dict["verbose"]);
            corrupt_images_val = populate_corrupt_from_csv(csv_val, dataset_val_path, delimiter, verbose=system_dict["verbose"]);

    return missing_images_train, missing_images_val, corrupt_images_train, corrupt_images_val;