@@ -80,24 +80,30 @@ _SUPPORTED_PROBLEM_GENERATORS = {
8080 lambda : algorithmic_math .algebra_inverse (26 , 0 , 2 , 100000 ),
8181 lambda : algorithmic_math .algebra_inverse (26 , 3 , 3 , 10000 )),
8282 "ice_parsing_tokens" : (
83- lambda : wmt .tabbed_parsing_token_generator (FLAGS . tmp_dir ,
84- True , "ice" , 2 ** 13 , 2 ** 8 ),
85- lambda : wmt .tabbed_parsing_token_generator (FLAGS . tmp_dir ,
86- False , "ice" , 2 ** 13 , 2 ** 8 )),
83+ lambda : wmt .tabbed_parsing_token_generator (
84+ FLAGS . data_dir , FLAGS . tmp_dir , True , "ice" , 2 ** 13 , 2 ** 8 ),
85+ lambda : wmt .tabbed_parsing_token_generator (
86+ FLAGS . data_dir , FLAGS . tmp_dir , False , "ice" , 2 ** 13 , 2 ** 8 )),
8787 "ice_parsing_characters" : (
88- lambda : wmt .tabbed_parsing_character_generator (FLAGS .tmp_dir , True ),
89- lambda : wmt .tabbed_parsing_character_generator (FLAGS .tmp_dir , False )),
88+ lambda : wmt .tabbed_parsing_character_generator (
89+ FLAGS .data_dir , FLAGS .tmp_dir , True ),
90+ lambda : wmt .tabbed_parsing_character_generator (
91+ FLAGS .data_dir , FLAGS .tmp_dir , False )),
9092 "wmt_parsing_tokens_8k" : (
91- lambda : wmt .parsing_token_generator (FLAGS .tmp_dir , True , 2 ** 13 ),
92- lambda : wmt .parsing_token_generator (FLAGS .tmp_dir , False , 2 ** 13 )),
93+ lambda : wmt .parsing_token_generator (
94+ FLAGS .data_dir , FLAGS .tmp_dir , True , 2 ** 13 ),
95+ lambda : wmt .parsing_token_generator (
96+ FLAGS .data_dir , FLAGS .tmp_dir , False , 2 ** 13 )),
9397 "wsj_parsing_tokens_16k" : (
94- lambda : wsj_parsing .parsing_token_generator (FLAGS . tmp_dir , True ,
95- 2 ** 14 , 2 ** 9 ),
96- lambda : wsj_parsing .parsing_token_generator (FLAGS . tmp_dir , False ,
97- 2 ** 14 , 2 ** 9 )),
98+ lambda : wsj_parsing .parsing_token_generator (
99+ FLAGS . data_dir , FLAGS . tmp_dir , True , 2 ** 14 , 2 ** 9 ),
100+ lambda : wsj_parsing .parsing_token_generator (
101+ FLAGS . data_dir , FLAGS . tmp_dir , False , 2 ** 14 , 2 ** 9 )),
98102 "wmt_ende_bpe32k" : (
99- lambda : wmt .ende_bpe_token_generator (FLAGS .tmp_dir , True ),
100- lambda : wmt .ende_bpe_token_generator (FLAGS .tmp_dir , False )),
103+ lambda : wmt .ende_bpe_token_generator (
104+ FLAGS .data_dir , FLAGS .tmp_dir , True ),
105+ lambda : wmt .ende_bpe_token_generator (
106+ FLAGS .data_dir , FLAGS .tmp_dir , False )),
101107 "lm1b_32k" : (
102108 lambda : lm1b .generator (FLAGS .tmp_dir , True ),
103109 lambda : lm1b .generator (FLAGS .tmp_dir , False )
@@ -119,101 +125,50 @@ _SUPPORTED_PROBLEM_GENERATORS = {
119125 lambda : image .cifar10_generator (FLAGS .tmp_dir , True , 50000 ),
120126 lambda : image .cifar10_generator (FLAGS .tmp_dir , False , 10000 )),
121127 "image_mscoco_characters_test" : (
122- lambda : image .mscoco_generator (FLAGS .tmp_dir , True , 80000 ),
123- lambda : image .mscoco_generator (FLAGS .tmp_dir , False , 40000 )),
128+ lambda : image .mscoco_generator (
129+ FLAGS .data_dir , FLAGS .tmp_dir , True , 80000 ),
130+ lambda : image .mscoco_generator (
131+ FLAGS .data_dir , FLAGS .tmp_dir , False , 40000 )),
124132 "image_celeba_tune" : (
125133 lambda : image .celeba_generator (FLAGS .tmp_dir , 162770 ),
126134 lambda : image .celeba_generator (FLAGS .tmp_dir , 19867 , 162770 )),
127135 "image_mscoco_tokens_8k_test" : (
128136 lambda : image .mscoco_generator (
129- FLAGS .tmp_dir ,
130- True ,
131- 80000 ,
132- vocab_filename = "tokens.vocab.%d" % 2 ** 13 ,
133- vocab_size = 2 ** 13 ),
137+ FLAGS .data_dir , FLAGS .tmp_dir , True , 80000 ,
138+ vocab_filename = "vocab.endefr.%d" % 2 ** 13 , vocab_size = 2 ** 13 ),
134139 lambda : image .mscoco_generator (
135- FLAGS .tmp_dir ,
136- False ,
137- 40000 ,
138- vocab_filename = "tokens.vocab.%d" % 2 ** 13 ,
139- vocab_size = 2 ** 13 )),
140+ FLAGS .data_dir , FLAGS .tmp_dir , False , 40000 ,
141+ vocab_filename = "vocab.endefr.%d" % 2 ** 13 , vocab_size = 2 ** 13 )),
140142 "image_mscoco_tokens_32k_test" : (
141143 lambda : image .mscoco_generator (
142- FLAGS .tmp_dir ,
143- True ,
144- 80000 ,
145- vocab_filename = "tokens.vocab.%d" % 2 ** 15 ,
146- vocab_size = 2 ** 15 ),
144+ FLAGS .data_dir , FLAGS .tmp_dir , True , 80000 ,
145+ vocab_filename = "vocab.endefr.%d" % 2 ** 15 , vocab_size = 2 ** 15 ),
147146 lambda : image .mscoco_generator (
148- FLAGS .tmp_dir ,
149- False ,
150- 40000 ,
151- vocab_filename = "tokens.vocab.%d" % 2 ** 15 ,
152- vocab_size = 2 ** 15 )),
147+ FLAGS .data_dir , FLAGS .tmp_dir , False , 40000 ,
148+ vocab_filename = "vocab.endefr.%d" % 2 ** 15 , vocab_size = 2 ** 15 )),
153149 "snli_32k" : (
154150 lambda : snli .snli_token_generator (FLAGS .tmp_dir , True , 2 ** 15 ),
155151 lambda : snli .snli_token_generator (FLAGS .tmp_dir , False , 2 ** 15 ),
156152 ),
157- "audio_timit_characters_tune" : (
158- lambda : audio .timit_generator (FLAGS .tmp_dir , True , 1374 ),
159- lambda : audio .timit_generator (FLAGS .tmp_dir , True , 344 , 1374 )),
160153 "audio_timit_characters_test" : (
161- lambda : audio .timit_generator (FLAGS .tmp_dir , True , 1718 ),
162- lambda : audio .timit_generator (FLAGS .tmp_dir , False , 626 )),
163- "audio_timit_tokens_8k_tune" : (
164154 lambda : audio .timit_generator (
165- FLAGS .tmp_dir ,
166- True ,
167- 1374 ,
168- vocab_filename = "tokens.vocab.%d" % 2 ** 13 ,
169- vocab_size = 2 ** 13 ),
155+ FLAGS .data_dir , FLAGS .tmp_dir , True , 1718 ),
170156 lambda : audio .timit_generator (
171- FLAGS .tmp_dir ,
172- True ,
173- 344 ,
174- 1374 ,
175- vocab_filename = "tokens.vocab.%d" % 2 ** 13 ,
176- vocab_size = 2 ** 13 )),
157+ FLAGS .data_dir , FLAGS .tmp_dir , False , 626 )),
177158 "audio_timit_tokens_8k_test" : (
178159 lambda : audio .timit_generator (
179- FLAGS .tmp_dir ,
180- True ,
181- 1718 ,
182- vocab_filename = "tokens.vocab.%d" % 2 ** 13 ,
183- vocab_size = 2 ** 13 ),
184- lambda : audio .timit_generator (
185- FLAGS .tmp_dir ,
186- False ,
187- 626 ,
188- vocab_filename = "tokens.vocab.%d" % 2 ** 13 ,
189- vocab_size = 2 ** 13 )),
190- "audio_timit_tokens_32k_tune" : (
191- lambda : audio .timit_generator (
192- FLAGS .tmp_dir ,
193- True ,
194- 1374 ,
195- vocab_filename = "tokens.vocab.%d" % 2 ** 15 ,
196- vocab_size = 2 ** 15 ),
160+ FLAGS .data_dir , FLAGS .tmp_dir , True , 1718 ,
161+ vocab_filename = "vocab.endefr.%d" % 2 ** 13 , vocab_size = 2 ** 13 ),
197162 lambda : audio .timit_generator (
198- FLAGS .tmp_dir ,
199- True ,
200- 344 ,
201- 1374 ,
202- vocab_filename = "tokens.vocab.%d" % 2 ** 15 ,
203- vocab_size = 2 ** 15 )),
163+ FLAGS .data_dir , FLAGS .tmp_dir , False , 626 ,
164+ vocab_filename = "vocab.endefr.%d" % 2 ** 13 , vocab_size = 2 ** 13 )),
204165 "audio_timit_tokens_32k_test" : (
205166 lambda : audio .timit_generator (
206- FLAGS .tmp_dir ,
207- True ,
208- 1718 ,
209- vocab_filename = "tokens.vocab.%d" % 2 ** 15 ,
210- vocab_size = 2 ** 15 ),
167+ FLAGS .data_dir , FLAGS .tmp_dir , True , 1718 ,
168+ vocab_filename = "vocab.endefr.%d" % 2 ** 15 , vocab_size = 2 ** 15 ),
211169 lambda : audio .timit_generator (
212- FLAGS .tmp_dir ,
213- False ,
214- 626 ,
215- vocab_filename = "tokens.vocab.%d" % 2 ** 15 ,
216- vocab_size = 2 ** 15 )),
170+ FLAGS .data_dir , FLAGS .tmp_dir , False , 626 ,
171+ vocab_filename = "vocab.endefr.%d" % 2 ** 15 , vocab_size = 2 ** 15 )),
217172 "lmptb_10k" : (
218173 lambda : ptb .train_generator (
219174 FLAGS .tmp_dir ,
0 commit comments