Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 042c44c

Browse files
author
Huyen Nguyen
committed
take in account FLAGS.num_shards
1 parent c91989c commit 042c44c

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

tensor2tensor/bin/t2t-datagen

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def generate_data_for_problem(problem):
356356

357357
def generate_data_for_registered_problem(problem_name):
358358
problem = registry.problem(problem_name)
359-
problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir)
359+
problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir, FLAGS.num_shards)
360360

361361

362362
if __name__ == "__main__":

tensor2tensor/data_generators/wmt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ def target_vocab_size(self):
4949
def feature_encoders(self, data_dir):
5050
return _default_wmt_feature_encoders(data_dir, self.target_vocab_size)
5151

52-
def generate_data(self, data_dir, tmp_dir):
52+
def generate_data(self, data_dir, tmp_dir, num_shards=100):
5353
generator_utils.generate_dataset_and_shuffle(
5454
ende_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size),
55-
self.training_filepaths(data_dir, 100, shuffled=False),
55+
self.training_filepaths(data_dir, num_shards, shuffled=False),
5656
ende_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size),
5757
self.dev_filepaths(data_dir, 1, shuffled=False))
5858

0 commit comments

Comments
 (0)