import pandas as pd
import numpy as np
import cv2
from torch.utils.data.dataset import Dataset
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, transform=None):
self.data = pd.read_csv(csv_path)
self.labels = pd.get_dummies(self.data['emotion']).as_matrix()
self.height = 48
self.width = 48
self.transform = transform
def __getitem__(self, index):
pixels = self.data['pixels'].tolist()
faces = []
for pixel_sequence in pixels:
face = [int(pixel) for pixel in pixel_sequence.split(' ')]
# print(np.asarray(face).shape)
face = np.asarray(face).reshape(self.width, self.height)
face = cv2.resize(face.astype('uint8'), (self.width, self.height))
faces.append(face.astype('float32'))
faces = np.asarray(faces)
faces = np.expand_dims(faces, -1)
return faces, self.labels
def __len__(self):
return len(self.data)
This is what I could manage to do by using references from other repositories. However, I want to split this dataset into train and test.
How can I do that inside this class? Or do I need to make a separate class to do that?
Starting in PyTorch 0.4.1 you can use random_split
:
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])