/tinyloader

Primary LanguagePythonApache License 2.0Apache-2.0

tinyloader

A tiny multiprocess data loader in ~100 lines, inspired by torch.utils.data.DataLoader, geohot/tinygrad, and karpathy/micrograd.

See blog post: DataLoaders Explained: Building a Multi-Process Data Loader from Scratch

Example

from dataloader import DataLoader
import numpy as np

class Dataset:
    def __init__(self, size):
        self.size = size

    def __len__(self):
        return self.size

    def __getitem__(self, index):
        return np.zeros((3, 32, 32)), 1


ds = Dataset(1024)
dl = DataLoader(ds, num_workers=4, batch_size=64)

x, y = next(dl)

print(x.shape)  # (64, 3, 32, 32)
print(y.shape)  # (64,)

Same Example in PyTorch

from torch.utils import data
import numpy as np

class Dataset(data.Dataset):
    def __init__(self, size):
        super().__init__()
        self.size = size

    def __len__(self):
        return self.size

    def __getitem__(self, index):
        return np.zeros((3, 32, 32)), 1


ds = Dataset(1024)
dl = data.DataLoader(ds, num_workers=4, batch_size=64)

x, y = next(iter(dl))

print(x.shape)  # torch.Size([64, 3, 32, 32])
print(y.shape)  # torch.Size([64])