Closed
Description
For example, the following script
from __future__ import print_function
import numpy as np
import random
# np.random.seed(0)
class Transform(object):
def __init__(self):
pass
def __call__(self, item = None):
return [np.random.randint(10000, 20000), random.randint(20000,30000)]
class RandomDataset(object):
def __init__(self):
pass
def __getitem__(self, ind):
item = [ind, np.random.randint(1, 10000), random.randint(10000, 20000), 0]
tsfm =Transform()(item)
return np.array(item + tsfm)
def __len__(self):
return 20
from torch.utils.data import DataLoader
ds = RandomDataset()
ds = DataLoader(ds, 10, shuffle=False, num_workers=4)
for batch in ds:
print(batch)
gives
# 0 2208 10983 0 15930 26264
# 1 2798 14403 0 17685 29545
# 2 528 16195 0 12927 28761
# 3 8541 13614 0 15240 24058
# 4 7144 14373 0 18374 28081
# 5 2329 17456 0 15192 26903
# 6 423 12168 0 18504 24193
# 7 9476 12027 0 19924 22325
# 8 3427 17570 0 12895 29773
# 9 6526 13327 0 15768 24566
#[torch.LongTensor of size 10x6]
#
#
# 10 2208 15203 0 15930 26024
# 11 2798 13264 0 17685 22011
# 12 528 11714 0 12927 24688
# 13 8541 11773 0 15240 29607
# 14 7144 14655 0 18374 24573
# 15 2329 12544 0 15192 27908
# 16 423 15892 0 18504 23111
# 17 9476 17389 0 19924 23799
# 18 3427 12458 0 12895 23201
# 19 6526 14935 0 15768 20789
#[torch.LongTensor of size 10x6]
The second and fifth columns are np.random generated random numbers in Dataset and Transform. However, they are always that the same for different batches. This will make random data augmentation actually "not random".
cc @ezyang @gchanan @zou3519 @bdhirsh @jbschlosser @anjali411 @ssnl @VitalyFedyunin @ejguan @pbelevich