Module monk.system.eda.utils

Expand source code
from system.eda.imports import *
from system.imports import *



def get_img_label(fname, delimiter):
    '''
    Find list of images and corresponding labels in csv file

    Args:
        fname (str): Path to csv file
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of images in dataset
        list: List of labels corresponding every image in dataset
    '''
    f = open(fname);
    lst = f.readlines();
    f.close();
    del lst[0]
    img_list = [];
    label_list = [];
    for i in range(len(lst)):
        img, label = lst[i][:len(lst[i])-1].split(delimiter);
        img_list.append(img);
        label_list.append(label);

    return img_list, label_list;





def read_csv(fname):
    '''
    Read CSV File

    Args:
        fname (str): Path to csv file

    Returns:
        list: List of every row in csv
    '''
    f = open(fname);
    lst = f.readlines();
    f.close();
    del lst[0]
    return lst;






def populate_from_folder_train(tpath):
    '''
    Find number of images in every class image folder - train

    Args:
        tpath (str): Path to image training folder

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    classes_folder = os.listdir(tpath);
    classes_folder_strength = [];
    for i in range(len(classes_folder)):
        classes_folder_strength.append(len(os.listdir(tpath + "/" + classes_folder[i])));
    return classes_folder, classes_folder_strength;






def populate_from_folder_trainval(tpath, vpath):
    '''
    Find number of images in every class image folder - train and val

    Args:
        tpath (str): Path to image training folder
        vpath (str): Path to image validation folder

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    classes_folder, classes_folder_strength = populate_from_folder_train(tpath);
    for i in range(len(os.listdir(vpath))):
            classes_folder_strength[i] = classes_folder_strength[i] + len(os.listdir(vpath + "/" + classes_folder[i]));
    return classes_folder, classes_folder_strength;




def populate_from_csv_train(tpath, delimiter):
    '''
    Find number of images in every class image csv - train

    Args:
        tpath (str): Path to csv pointing to training dataset
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    img_list, label_list = get_img_label(tpath, delimiter);
    classes_folder = list(np.unique(sorted(label_list)))
    classes_folder_strength = [];
    for i in range(len(classes_folder)):
        classes_folder_strength.append(label_list.count(classes_folder[i]));
    return classes_folder, classes_folder_strength;




def populate_from_csv_trainval(tpath, vpath, delimiter):
    '''
    Find number of images in every class image csv - train and val

    Args:
        tpath (str): Path to csv pointing to training dataset
        vpath (str): Path to csv pointing to validation dataset
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    classes_folder, classes_folder_strength = populate_from_csv_train(tpath, delimiter);
    img_list, label_list = get_img_label(vpath, delimiter);
    for i in range(len(classes_folder)):
        classes_folder_strength[i] += label_list.count(classes_folder[i]);
    return classes_folder, classes_folder_strength;




def populate_missing(tpath, dataset_path, delimiter):
    '''
    Find number of missing images in imageset

    Args:
        tpath (str): Path to csv pointing to training dataset
        dataset_path (str): Path to dataset containing images
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of missing images
    '''
    lst = read_csv(tpath);
    missing_images = [];
    for i in range(len(lst)):
        img, label = lst[i][:len(lst[i])-1].split(delimiter);
        if(not os.path.isfile(dataset_path + "/" + img)):
            missing_images.append(dataset_path + "/" + img);
    return missing_images;




def populate_corrupt_from_foldered(dataset_path, verbose=1):
    '''
    Find number of corrupted images in imageset - foldered type

    Args:
        dataset_path (str): Path to dataset containing images
        verbose (str): If True, prints logs for analysis 

    Returns:
        list: List of corrupt images
    '''
    classes_folder = os.listdir(dataset_path);
    corrupt_images = [];
    if(verbose):
        for i in tqdm(range(len(classes_folder))):
            list_imgs = os.listdir(dataset_path + "/" + classes_folder[i]);  
            for j in range(len(list_imgs)):
                img_name = dataset_path + "/" + classes_folder[i] + "/" + list_imgs[j]
                if(os.path.isfile(img_name)):
                    img = Image.open(img_name)
                    try:
                        img.verify()
                    except Exception:
                        corrupt_images.append(img_name)
    else:
        for i in range(len(classes_folder)):
            list_imgs = os.listdir(dataset_path + "/" + classes_folder[i]);  
            for j in range(len(list_imgs)):
                img_name = dataset_path + "/" + classes_folder[i] + "/" + list_imgs[j]
                if(os.path.isfile(img_name)):
                    img = Image.open(img_name)
                    try:
                        img.verify()
                    except Exception:
                        corrupt_images.append(img_name)
    return corrupt_images;




def populate_corrupt_from_csv(tpath, dataset_path, delimiter, verbose=1):
    '''
    Find number of corrupted images in imageset - csv type

    Args:
        dataset_path (str): Path to dataset containing images
        verbose (str): If True, prints logs for analysis 

    Returns:
        list: List of corrupt images
    '''
    lst = read_csv(tpath);
    corrupt_images = [];
    if(verbose):
        for i in tqdm(range(len(lst))):
            img_name, label = lst[i][:len(lst[i])-1].split(delimiter);
            img_name = dataset_path + "/" + img_name
            if(os.path.isfile(img_name)):
                img = Image.open(img_name)
                try:
                    img.verify()
                except Exception:
                    corrupt_images.append(img_name)
    else:
        for i in range(len(lst)):
            img_name, label = lst[i][:len(lst[i])-1].split(delimiter);
            img_name = dataset_path + "/" + img_name
            if(os.path.isfile(img_name)):
                img = Image.open(img_name)
                try:
                    img.verify()
                except Exception:
                    corrupt_images.append(img_name)
    return corrupt_images;

Functions

def get_img_label(fname, delimiter)

Find list of images and corresponding labels in csv file

Args

fname : str
Path to csv file
delimiter : str
Delimiter for csv file

Returns

list
List of images in dataset
list
List of labels corresponding every image in dataset
Expand source code
def get_img_label(fname, delimiter):
    '''
    Find list of images and corresponding labels in csv file

    Args:
        fname (str): Path to csv file
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of images in dataset
        list: List of labels corresponding every image in dataset
    '''
    f = open(fname);
    lst = f.readlines();
    f.close();
    del lst[0]
    img_list = [];
    label_list = [];
    for i in range(len(lst)):
        img, label = lst[i][:len(lst[i])-1].split(delimiter);
        img_list.append(img);
        label_list.append(label);

    return img_list, label_list;
def populate_corrupt_from_csv(tpath, dataset_path, delimiter, verbose=1)

Find number of corrupted images in imageset - csv type

Args

dataset_path : str
Path to dataset containing images
verbose : str
If True, prints logs for analysis

Returns

list
List of corrupt images
Expand source code
def populate_corrupt_from_csv(tpath, dataset_path, delimiter, verbose=1):
    '''
    Find number of corrupted images in imageset - csv type

    Args:
        dataset_path (str): Path to dataset containing images
        verbose (str): If True, prints logs for analysis 

    Returns:
        list: List of corrupt images
    '''
    lst = read_csv(tpath);
    corrupt_images = [];
    if(verbose):
        for i in tqdm(range(len(lst))):
            img_name, label = lst[i][:len(lst[i])-1].split(delimiter);
            img_name = dataset_path + "/" + img_name
            if(os.path.isfile(img_name)):
                img = Image.open(img_name)
                try:
                    img.verify()
                except Exception:
                    corrupt_images.append(img_name)
    else:
        for i in range(len(lst)):
            img_name, label = lst[i][:len(lst[i])-1].split(delimiter);
            img_name = dataset_path + "/" + img_name
            if(os.path.isfile(img_name)):
                img = Image.open(img_name)
                try:
                    img.verify()
                except Exception:
                    corrupt_images.append(img_name)
    return corrupt_images;
def populate_corrupt_from_foldered(dataset_path, verbose=1)

Find number of corrupted images in imageset - foldered type

Args

dataset_path : str
Path to dataset containing images
verbose : str
If True, prints logs for analysis

Returns

list
List of corrupt images
Expand source code
def populate_corrupt_from_foldered(dataset_path, verbose=1):
    '''
    Find number of corrupted images in imageset - foldered type

    Args:
        dataset_path (str): Path to dataset containing images
        verbose (str): If True, prints logs for analysis 

    Returns:
        list: List of corrupt images
    '''
    classes_folder = os.listdir(dataset_path);
    corrupt_images = [];
    if(verbose):
        for i in tqdm(range(len(classes_folder))):
            list_imgs = os.listdir(dataset_path + "/" + classes_folder[i]);  
            for j in range(len(list_imgs)):
                img_name = dataset_path + "/" + classes_folder[i] + "/" + list_imgs[j]
                if(os.path.isfile(img_name)):
                    img = Image.open(img_name)
                    try:
                        img.verify()
                    except Exception:
                        corrupt_images.append(img_name)
    else:
        for i in range(len(classes_folder)):
            list_imgs = os.listdir(dataset_path + "/" + classes_folder[i]);  
            for j in range(len(list_imgs)):
                img_name = dataset_path + "/" + classes_folder[i] + "/" + list_imgs[j]
                if(os.path.isfile(img_name)):
                    img = Image.open(img_name)
                    try:
                        img.verify()
                    except Exception:
                        corrupt_images.append(img_name)
    return corrupt_images;
def populate_from_csv_train(tpath, delimiter)

Find number of images in every class image csv - train

Args

tpath : str
Path to csv pointing to training dataset
delimiter : str
Delimiter for csv file

Returns

list
List of classes
list
List of number of images in every class
Expand source code
def populate_from_csv_train(tpath, delimiter):
    '''
    Find number of images in every class image csv - train

    Args:
        tpath (str): Path to csv pointing to training dataset
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    img_list, label_list = get_img_label(tpath, delimiter);
    classes_folder = list(np.unique(sorted(label_list)))
    classes_folder_strength = [];
    for i in range(len(classes_folder)):
        classes_folder_strength.append(label_list.count(classes_folder[i]));
    return classes_folder, classes_folder_strength;
def populate_from_csv_trainval(tpath, vpath, delimiter)

Find number of images in every class image csv - train and val

Args

tpath : str
Path to csv pointing to training dataset
vpath : str
Path to csv pointing to validation dataset
delimiter : str
Delimiter for csv file

Returns

list
List of classes
list
List of number of images in every class
Expand source code
def populate_from_csv_trainval(tpath, vpath, delimiter):
    '''
    Find number of images in every class image csv - train and val

    Args:
        tpath (str): Path to csv pointing to training dataset
        vpath (str): Path to csv pointing to validation dataset
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    classes_folder, classes_folder_strength = populate_from_csv_train(tpath, delimiter);
    img_list, label_list = get_img_label(vpath, delimiter);
    for i in range(len(classes_folder)):
        classes_folder_strength[i] += label_list.count(classes_folder[i]);
    return classes_folder, classes_folder_strength;
def populate_from_folder_train(tpath)

Find number of images in every class image folder - train

Args

tpath : str
Path to image training folder

Returns

list
List of classes
list
List of number of images in every class
Expand source code
def populate_from_folder_train(tpath):
    '''
    Find number of images in every class image folder - train

    Args:
        tpath (str): Path to image training folder

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    classes_folder = os.listdir(tpath);
    classes_folder_strength = [];
    for i in range(len(classes_folder)):
        classes_folder_strength.append(len(os.listdir(tpath + "/" + classes_folder[i])));
    return classes_folder, classes_folder_strength;
def populate_from_folder_trainval(tpath, vpath)

Find number of images in every class image folder - train and val

Args

tpath : str
Path to image training folder
vpath : str
Path to image validation folder

Returns

list
List of classes
list
List of number of images in every class
Expand source code
def populate_from_folder_trainval(tpath, vpath):
    '''
    Find number of images in every class image folder - train and val

    Args:
        tpath (str): Path to image training folder
        vpath (str): Path to image validation folder

    Returns:
        list: List of classes
        list: List of number of images in every class
    '''
    classes_folder, classes_folder_strength = populate_from_folder_train(tpath);
    for i in range(len(os.listdir(vpath))):
            classes_folder_strength[i] = classes_folder_strength[i] + len(os.listdir(vpath + "/" + classes_folder[i]));
    return classes_folder, classes_folder_strength;
def populate_missing(tpath, dataset_path, delimiter)

Find number of missing images in imageset

Args

tpath : str
Path to csv pointing to training dataset
dataset_path : str
Path to dataset containing images
delimiter : str
Delimiter for csv file

Returns

list
List of missing images
Expand source code
def populate_missing(tpath, dataset_path, delimiter):
    '''
    Find number of missing images in imageset

    Args:
        tpath (str): Path to csv pointing to training dataset
        dataset_path (str): Path to dataset containing images
        delimiter (str): Delimiter for csv file

    Returns:
        list: List of missing images
    '''
    lst = read_csv(tpath);
    missing_images = [];
    for i in range(len(lst)):
        img, label = lst[i][:len(lst[i])-1].split(delimiter);
        if(not os.path.isfile(dataset_path + "/" + img)):
            missing_images.append(dataset_path + "/" + img);
    return missing_images;
def read_csv(fname)

Read CSV File

Args

fname : str
Path to csv file

Returns

list
List of every row in csv
Expand source code
def read_csv(fname):
    '''
    Read CSV File

    Args:
        fname (str): Path to csv file

    Returns:
        list: List of every row in csv
    '''
    f = open(fname);
    lst = f.readlines();
    f.close();
    del lst[0]
    return lst;