Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 9709825

Browse files
author
Huyen Nguyen
committed
more num_shards
1 parent 042c44c commit 9709825

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

tensor2tensor/data_generators/algorithmic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ class AlgorithmicIdentityBinary40(problem.Problem):
3636
def num_symbols(self):
3737
return 2
3838

39-
def generate_data(self, data_dir, _):
39+
def generate_data(self, data_dir, _, num_shards=100):
4040
utils.generate_dataset_and_shuffle(
4141
identity_generator(self.num_symbols, 40, 100000),
42-
self.training_filepaths(data_dir, 100, shuffled=True),
42+
self.training_filepaths(data_dir, num_shards, shuffled=True),
4343
identity_generator(self.num_symbols, 400, 10000),
4444
self.dev_filepaths(data_dir, 1, shuffled=True),
4545
shuffle=False)

tensor2tensor/data_generators/problem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class Problem(object):
113113
# BEGIN SUBCLASS INTERFACE
114114
# ============================================================================
115115

116-
def generate_data(self, data_dir, tmp_dir):
116+
def generate_data(self, data_dir, tmp_dir, num_shards=100):
117117
raise NotImplementedError()
118118

119119
def hparams(self, defaults, model_hparams):

tensor2tensor/data_generators/wmt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ def target_vocab_size(self):
9292
def feature_encoders(self, data_dir):
9393
return _default_wmt_feature_encoders(data_dir, self.target_vocab_size)
9494

95-
def generate_data(self, data_dir, tmp_dir):
95+
def generate_data(self, data_dir, tmp_dir, num_shards=100):
9696
generator_utils.generate_dataset_and_shuffle(
9797
mken_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size),
98-
self.training_filepaths(data_dir, 100, shuffled=False),
98+
self.training_filepaths(data_dir, num_shards, shuffled=False),
9999
mken_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size),
100100
self.dev_filepaths(data_dir, 1, shuffled=False))
101101

0 commit comments

Comments
 (0)