Source code for directory_data_loader

import os, cv2
import numpy as np

from src.data_loader import _DataLoader

[docs]class DirectoryDataLoader(_DataLoader): """ DataLoader subclass loads image and label data from directory. :param target_emotion_map: Optional dict of target emotion label values/strings and their corresponding label vector index values. :param datapath: Location of image dataset. :param validation_split: Float percentage of data to use as validation set. :param out_channels: Number of image channels. :param time_delay: Number of images to load from each time series sample. Parameter must be provided to load time series data and unspecified if using static image data. """ def __init__(self, target_emotion_map=None, datapath=None, validation_split=0.2, out_channels=1, time_delay=None): self.datapath = datapath self.target_emotion_map = target_emotion_map self.out_channels = out_channels super().__init__(validation_split, time_delay)
[docs] def load_data(self): """ Loads image and label data from specified directory path. :return: Dataset object containing image and label data. """ images = list() labels = list() emotion_index_map = dict() label_directories = [dir for dir in os.listdir(self.datapath) if not dir.startswith('.')] for label_directory in label_directories: if self.target_emotion_map: if label_directory not in self.target_emotion_map.keys(): continue self._add_new_label_to_map(label_directory, emotion_index_map) label_directory_path = self.datapath + '/' + label_directory if self.time_delay: self._load_series_for_single_emotion_directory(images, label_directory, label_directory_path, labels) else: image_files = [image_file for image_file in os.listdir(label_directory_path) if not image_file.startswith('.')] self._load_images_from_directory_to_array(image_files, images, label_directory, label_directory_path, labels) vectorized_labels = self._vectorize_labels(emotion_index_map, labels) self._check_data_not_empty(images) return self._load_dataset(np.array(images), np.array(vectorized_labels), emotion_index_map)
def _load_series_for_single_emotion_directory(self, images, label_directory, label_directory_path, labels): series_directories = [series_directory for series_directory in os.listdir(label_directory_path) if not series_directory.startswith('.')] for series_directory in series_directories: series_directory_path = label_directory_path + '/' + series_directory self._check_series_directory_size(series_directory_path) new_image_series = list() image_files = [image_file for image_file in os.listdir(series_directory_path) if not image_file.startswith('.')] self._load_images_from_directory_to_array(image_files, new_image_series, label_directory, series_directory_path, labels) new_image_series = self._apply_time_delay_to_series(images, new_image_series) images.append(new_image_series) labels.append(label_directory) def _apply_time_delay_to_series(self, images, new_image_series): start_idx = len(new_image_series) - self.time_delay end_idx = len(new_image_series) return new_image_series[start_idx:end_idx] def _load_images_from_directory_to_array(self, image_files, images, label, directory_path, labels): for image_file in image_files: images.append(self._load_image(image_file, directory_path)) if not self.time_delay: labels.append(label) def _add_new_label_to_map(self, label_directory, label_index_map): new_label_index = len(label_index_map.keys()) label_index_map[label_directory] = new_label_index def _load_image(self, image_file, directory_path): image_file_path = directory_path + '/' + image_file image = cv2.imread(image_file_path) image = self._reshape(image) return image def _validate_arguments(self): self._check_directory_arguments() def _check_directory_arguments(self): """ Validates arguments for loading from directories, including static image and time series directories. """ if not os.path.isdir(self.datapath): raise (NotADirectoryError('Directory does not exist: %s' % self.datapath)) if self.time_delay: if self.time_delay < 1: raise ValueError('Time step argument must be greater than 0, but gave: %i' % self.time_delay) if not isinstance(self.time_delay, int): raise ValueError('Time step argument must be an integer, but gave: %s' % str(self.time_delay)) def _check_series_directory_size(self, series_directory_path): image_files = [image_file for image_file in os.listdir(series_directory_path) if not image_file.startswith('.')] if len(image_files) < self.time_delay: raise ValueError('Time series sample found in path %s does not contain enough images for %s time steps.' % ( series_directory_path, str(self.time_delay)))