88from tensorlayer .models .transformer .utils import metrics
99from tensorlayer .models .transformer .utils import attention_visualisation
1010import tensorlayer as tl
11-
12-
1311""" Translation from Portugese to English by Transformer model
1412This tutorial provides basic instructions on how to define and train Transformer model on Tensorlayer for
1513Translation task. You can also learn how to visualize the attention block via this tutorial.
1614"""
1715
16+
1817def set_up_dataset ():
1918 # Set up dataset for Portugese-English translation from the TED Talks Open Translation Project.
2019 # This dataset contains approximately 50000 training examples, 1100 validation examples, and 2000 test examples.
2120 # https://www.ted.com/participate/translate
2221
23- examples , metadata = tfds .load ('ted_hrlr_translate/pt_to_en' , with_info = True ,
24- as_supervised = True )
22+ examples , metadata = tfds .load ('ted_hrlr_translate/pt_to_en' , with_info = True , as_supervised = True )
2523 train_examples , val_examples = examples ['train' ], examples ['validation' ]
2624
2725 # Set up tokenizer and save the tokenizer
2826 tokenizer = tfds .features .text .SubwordTextEncoder .build_from_corpus (
29- (en .numpy () and pt .numpy () for pt , en in train_examples ), target_vocab_size = 2 ** 14 )
27+ (en .numpy () and pt .numpy () for pt , en in train_examples ), target_vocab_size = 2 ** 14
28+ )
3029
3130 tokenizer .save_to_file ("tokenizer" )
3231 tokenizer = tfds .features .text .SubwordTextEncoder .load_from_file ("tokenizer" )
@@ -38,44 +37,42 @@ def test_tokenizer_success(tokenizer):
3837 sample_string = 'TensorLayer is awesome.'
3938
4039 tokenized_string = tokenizer .encode (sample_string )
41- print ('Tokenized string is {}' .format (tokenized_string ))
40+ print ('Tokenized string is {}' .format (tokenized_string ))
4241
4342 original_string = tokenizer .decode (tokenized_string )
44- print ('The original string: {}' .format (original_string ))
43+ print ('The original string: {}' .format (original_string ))
4544 assert original_string == sample_string
4645
4746
48-
4947def generate_training_dataset (train_examples , tokenizer ):
48+
5049 def encode (lang1 , lang2 ):
51- lang1 = tokenizer .encode (
52- lang1 .numpy ()) + [tokenizer .vocab_size + 1 ]
50+ lang1 = tokenizer .encode (lang1 .numpy ()) + [tokenizer .vocab_size + 1 ]
51+
52+ lang2 = tokenizer .encode (lang2 .numpy ()) + [tokenizer .vocab_size + 1 ]
5353
54- lang2 = tokenizer .encode (
55- lang2 .numpy ()) + [tokenizer .vocab_size + 1 ]
56-
5754 return lang1 , lang2
55+
5856 MAX_LENGTH = 50
57+
5958 def filter_max_length (x , y , max_length = MAX_LENGTH ):
60- return tf .logical_and (tf .size (x ) <= max_length ,
61- tf . size ( y ) <= max_length )
59+ return tf .logical_and (tf .size (x ) <= max_length , tf . size ( y ) <= max_length )
60+
6261 def tf_encode (pt , en ):
6362 return tf .py_function (encode , [pt , en ], [tf .int64 , tf .int64 ])
63+
6464 train_dataset = train_examples .map (tf_encode )
6565 train_dataset = train_dataset .filter (filter_max_length )
6666 # cache the dataset to memory to get a speedup while reading from it.
6767 train_dataset = train_dataset .cache ()
6868 BUFFER_SIZE = 20000
6969 BATCH_SIZE = 64
70- train_dataset = train_dataset .shuffle (BUFFER_SIZE ).padded_batch (
71- BATCH_SIZE , padded_shapes = ([- 1 ], [- 1 ]))
70+ train_dataset = train_dataset .shuffle (BUFFER_SIZE ).padded_batch (BATCH_SIZE , padded_shapes = ([- 1 ], [- 1 ]))
7271 train_dataset = train_dataset .prefetch (tf .data .experimental .AUTOTUNE )
7372
7473 return train_dataset
7574
7675
77-
78-
7976def model_setup (tokenizer ):
8077 # define Hyper parameters for transformer
8178 class HYPER_PARAMS (object ):
@@ -91,16 +88,14 @@ class HYPER_PARAMS(object):
9188 extra_decode_length = 50
9289 beam_size = 5
9390 alpha = 0.6 # used to calculate length normalization in beam search
94-
95-
96- label_smoothing = 0.1
97- learning_rate = 2.0
98- learning_rate_decay_rate = 1.0
99- learning_rate_warmup_steps = 4000
100-
101- sos_id = 0
102- eos_id = tokenizer .vocab_size + 1
10391
92+ label_smoothing = 0.1
93+ learning_rate = 2.0
94+ learning_rate_decay_rate = 1.0
95+ learning_rate_warmup_steps = 4000
96+
97+ sos_id = 0
98+ eos_id = tokenizer .vocab_size + 1
10499
105100 model = Transformer (HYPER_PARAMS )
106101
@@ -112,20 +107,20 @@ class HYPER_PARAMS(object):
112107
113108# Use the Adam optimizer with a custom learning rate scheduler according to the formula in the Paper "Attention is All you need"
114109class CustomSchedule (tf .keras .optimizers .schedules .LearningRateSchedule ):
115- def __init__ (self , d_model , warmup_steps = 5 ):
116- super (CustomSchedule , self ).__init__ ()
117-
118- self .d_model = d_model
119- self .d_model = tf .cast (self .d_model , tf .float32 )
120110
121- self . warmup_steps = warmup_steps
122-
123- def __call__ ( self , step ):
124- arg1 = tf . math . rsqrt ( step )
125- arg2 = step * ( self .warmup_steps ** - 1.5 )
126-
127- return tf . math . rsqrt ( self . d_model ) * tf . math . minimum ( arg1 , arg2 )
111+ def __init__ ( self , d_model , warmup_steps = 5 ):
112+ super ( CustomSchedule , self ). __init__ ()
113+
114+ self . d_model = d_model
115+ self .d_model = tf . cast ( self . d_model , tf . float32 )
116+
117+ self . warmup_steps = warmup_steps
128118
119+ def __call__ (self , step ):
120+ arg1 = tf .math .rsqrt (step )
121+ arg2 = step * (self .warmup_steps ** - 1.5 )
122+
123+ return tf .math .rsqrt (self .d_model ) * tf .math .minimum (arg1 , arg2 )
129124
130125
131126def tutorial_transformer ():
@@ -146,23 +141,17 @@ def tutorial_transformer():
146141 if (batch % 50 == 0 ):
147142 print ('Batch ID {} at Epoch [{}/{}]: loss {:.4f}' .format (batch , epoch + 1 , num_epochs , loss ))
148143
149-
150-
151144 model .eval ()
152145 sentence_en = tokenizer .encode ('TensorLayer is awesome.' )
153146 [prediction , weights_decoder ], weights_encoder = model (inputs = [sentence_en ])
154147
155- predicted_sentence = tokenizer .decode ([i for i in prediction ["outputs" ][0 ]
156- if i < tokenizer .vocab_size ])
148+ predicted_sentence = tokenizer .decode ([i for i in prediction ["outputs" ][0 ] if i < tokenizer .vocab_size ])
157149 print ("Translated: " , predicted_sentence )
158150
159-
160- # visualize the self attention
151+ # visualize the self attention
161152 tokenizer_str = [tokenizer .decode ([ts ]) for ts in (sentence_en )]
162153 attention_visualisation .plot_attention_weights (weights_encoder ["layer_0" ], tokenizer_str , tokenizer_str )
163154
164-
165-
166155
167156if __name__ == "__main__" :
168157 tutorial_transformer ()
0 commit comments