merge from github

Lukasz Kaiser · Ryan Sepassi · commit afd1565fe367 · 2017-07-11T18:27:18.000-07:00
PiperOrigin-RevId: 161447896
diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen
@@ -341,8 +341,10 @@ def main(_):
         FLAGS.data_dir, FLAGS.num_shards, FLAGS.max_cases)
 
     tf.logging.info("Generating development data for %s.", problem)
+    dev_shards = 10 if "coco" in problem else 1
     dev_output_files = generator_utils.generate_files(
-        dev_gen(), problem + UNSHUFFLED_SUFFIX + "-dev", FLAGS.data_dir, 1)
+        dev_gen(), problem + UNSHUFFLED_SUFFIX + "-dev",
+        FLAGS.data_dir, dev_shards)
 
     tf.logging.info("Shuffling data...")
     for fname in train_output_files + dev_output_files:
diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2017 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py
@@ -28,7 +28,8 @@
 # Dependency imports
 
 import six
-from six import PY2, unichr  # pylint: disable=redefined-builtin
+from six import PY2
+from six import unichr  # pylint: disable=redefined-builtin
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensor2tensor.data_generators import tokenizer
 
diff --git a/tensor2tensor/docs/distributed_training.md b/tensor2tensor/docs/distributed_training.md
@@ -51,7 +51,7 @@ Parameter servers only need `--schedule=run_std_server`.
 
 ## Utility to produce `TF_CONFIG` and flags
 
-[`bin/make_tf_configs.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/make_tf_configs.py))
+[`bin/make_tf_configs.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-make-tf-configs))
 generates the `TF_CONFIG` json strings and the above-mentioned command-line
 flags for the workers and parameter servers.
 
diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py
@@ -292,7 +292,7 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs):
   """Conditional conv_fn making kernel 1d or 2d depending on inputs shape."""
   static_shape = inputs.get_shape()
   if not static_shape or len(static_shape) != 4:
-    raise ValueError("Inputs to conv must have statically known rank 4. Shape:" +str(static_shape))
+    raise ValueError("Inputs to conv must have statically known rank 4.")
   # Add support for left padding.
   if "padding" in kwargs and kwargs["padding"] == "LEFT":
     dilation_rate = (1, 1)
@@ -1378,128 +1378,3 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence):
     xentropy = tf.nn.softmax_cross_entropy_with_logits(
         logits=logits, labels=soft_targets)
     return xentropy - normalizing
-
-   
-def global_pool_1d(inputs, pooling_type='MAX', mask=None):
-  """
-  Pools elements across the last dimension. Useful to a list of vectors into a
-  single vector to get a representation of a set.
-  
-  Args
-      inputs: A tensor of dimensions batch_size x sequence_length x input_dims
-        containing the sequences of input vectors.
-      pooling_type: the pooling type to use, MAX or AVR
-      mask: A tensor of dimensions batch_size x sequence_length containing a
-        mask for the inputs with 1's for existing elements, and 0's elsewhere.
-  Outputs
-      output: A tensor of dimensions batch_size x input_dims
-        dimension containing the sequences of transformed vectors.
-  """
-  
-  with tf.name_scope("global_pool", [inputs]):
-    if mask is not None:
-      mask = tf.expand_dims(mask, axis=2)
-      inputs = tf.multiply(inputs, mask)
-  
-    if pooling_type == 'MAX':
-      # A tf.pool can be used here, but reduce is cleaner
-      output = tf.reduce_max(inputs, axis=1)
-    elif pooling_type == 'AVR':
-      if mask is not None:
-        # Some elems are dummy elems so we can't just reduce the average
-        output = tf.reduce_sum(inputs, axis=1)
-        num_elems = tf.reduce_sum(mask, axis=1, keep_dims=True)
-        output = tf.div(output, num_elems)
-        #N.B: this will cause a NaN if one batch contains no elements
-      else:
-        output = tf.reduce_mean(inputs, axis=1) 
-    
-  return output
-  
-  
-def linear_set_layer(layer_size,
-                     inputs,
-                     context=None,
-                     activation_fn=tf.nn.relu,
-                     dropout=0.0,
-                     name=None):
-  """ 
-  Basic layer type for doing funky things with sets.
-  Applies a linear transformation to each element in the input set.
-  If a context is supplied, it is concatenated with the inputs.
-    e.g. One can use global_pool_1d to get a representation of the set which
-    can then be used as the context for the next layer.
-    
-  Args
-      layer_size: Dimension to transform the input vectors to
-      inputs: A tensor of dimensions batch_size x sequence_length x input_dims
-        containing the sequences of input vectors.
-      context: A tensor of dimensions batch_size x context_dims
-        containing a global statistic about the set.
-      dropout: Dropout probability.
-      activation_fn: The activation function to use.
-  Outputs
-      output: A tensor of dimensions batch_size x sequence_length x output_dims
-        dimension containing the sequences of transformed vectors.
-        
-  TODO: Add bias add.
-  """
-    
-  with tf.variable_scope(name, "linear_set_layer", [inputs]):
-    # Apply 1D convolution to apply linear filter to each element along the 2nd
-    #  dimension
-    #in_size = inputs.get_shape().as_list()[-1]
-    outputs = conv1d(inputs, layer_size, 1, activation=None, name="set_conv")
-
-    # Apply the context if it exists
-    if context is not None:
-      # Unfortunately tf doesn't support broadcasting via concat, but we can
-      #  simply add the transformed context to get the same effect
-      context = tf.expand_dims(context, axis=1)
-      #context_size = context.get_shape().as_list()[-1]
-      cont_tfm = conv1d(context, layer_size, 1,
-          activation=None, name="cont_conv")
-      outputs += cont_tfm
-    
-    if activation_fn is not None:
-      outputs = activation_fn(outputs)
-      
-    if dropout != 0.0:
-      output = tf.nn.dropout(output, 1.0 - dropout)
-        
-    return outputs
-    
-    
-def ravanbakhsh_set_layer(layer_size,
-                          inputs,
-                          mask=None,
-                          activation_fn=tf.nn.tanh,
-                          dropout=0.0,
-                          name=None):
-  """
-  Layer from Deep Sets paper: https://arxiv.org/abs/1611.04500
-  More parameter-efficient verstion of a linear-set-layer with context.
-  
-  
-  Args
-      layer_size: Dimension to transform the input vectors to.
-      inputs: A tensor of dimensions batch_size x sequence_length x vector
-        containing the sequences of input vectors.
-      mask: A tensor of dimensions batch_size x sequence_length containing a
-        mask for the inputs with 1's for existing elements, and 0's elsewhere.
-      activation_fn: The activation function to use.
-  Outputs
-      output: A tensor of dimensions batch_size x sequence_length x vector
-        dimension containing the sequences of transformed vectors.
-  """
-    
-  with tf.variable_scope(name, "ravanbakhsh_set_layer", [inputs]):
-    output = linear_set_layer(
-        layer_size,
-        inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1),
-        activation_fn=activation_fn,
-        name=name)
-        
-    return output
-
-
diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py
@@ -50,15 +50,15 @@ def testSaturatingSigmoid(self):
     self.assertAllClose(res, [0.0, 0.0, 0.5, 1.0, 1.0])
 
   def testFlatten4D3D(self):
-    x = np.random.randint(1, 9, size=(3, 5, 2))
+    x = np.random.random_integers(1, high=8, size=(3, 5, 2))
     with self.test_session() as session:
       y = common_layers.flatten4d3d(common_layers.embedding(x, 10, 7))
       session.run(tf.global_variables_initializer())
       res = session.run(y)
     self.assertEqual(res.shape, (3, 5 * 2, 7))
 
   def testEmbedding(self):
-    x = np.random.randint(1, 9, size=(3, 5))
+    x = np.random.random_integers(1, high=8, size=(3, 5))
     with self.test_session() as session:
       y = common_layers.embedding(x, 10, 16)
       session.run(tf.global_variables_initializer())
@@ -81,14 +81,6 @@ def testConv(self):
       session.run(tf.global_variables_initializer())
       res = session.run(y)
     self.assertEqual(res.shape, (5, 5, 1, 13))
-    
-  def testConv1d(self):
-    x = np.random.rand(5, 7, 11)
-    with self.test_session() as session:
-      y = common_layers.conv1d(tf.constant(x, dtype=tf.float32), 13, 1)
-      session.run(tf.global_variables_initializer())
-      res = session.run(y)
-    self.assertEqual(res.shape, (5, 7, 13))
 
   def testSeparableConv(self):
     x = np.random.rand(5, 7, 1, 11)
@@ -301,66 +293,6 @@ def testDeconvStride2MultiStep(self):
       session.run(tf.global_variables_initializer())
       actual = session.run(a)
     self.assertEqual(actual.shape, (5, 32, 1, 16))
-    
-  def testGlobalPool1d(self):
-    shape = (5, 4)
-    x1 = np.random.rand(5,4,11)
-    #mask = np.random.randint(2, size=shape)
-    no_mask = np.ones((5,4))
-    full_mask = np.zeros((5,4))
-    
-    with self.test_session() as session:
-      x1_ = tf.Variable(x1, dtype=tf.float32)
-      no_mask_ = tf.Variable(no_mask, dtype=tf.float32)
-      full_mask_ = tf.Variable(full_mask, dtype=tf.float32)
-      
-      none_mask_max = common_layers.global_pool_1d(x1_)
-      no_mask_max = common_layers.global_pool_1d(x1_, mask=no_mask_)
-      result1 = tf.reduce_sum(none_mask_max - no_mask_max)
-      
-      full_mask_max = common_layers.global_pool_1d(x1_, mask=full_mask_)
-      result2 = tf.reduce_sum(full_mask_max)
-      
-      none_mask_avr = common_layers.global_pool_1d(x1_, 'AVR')
-      no_mask_avr = common_layers.global_pool_1d(x1_, 'AVR', no_mask_)
-      result3 = tf.reduce_sum(none_mask_avr - no_mask_avr)
-      
-      full_mask_avr = common_layers.global_pool_1d(x1_, 'AVR', full_mask_)
-      result4 = tf.reduce_sum(full_mask_avr)
-      
-      session.run(tf.global_variables_initializer())
-      actual = session.run([result1, result2, result3, result4])
-      # N.B: Last result will give a NaN.
-    self.assertAllEqual(actual[:3], [0.0, 0.0, 0.0])
-
-
-  def testLinearSetLayer(self):
-    x1 = np.random.rand(5,4,11)
-    cont = np.random.rand(5,13)
-    with self.test_session() as session:
-      x1_ = tf.Variable(x1, dtype=tf.float32)
-      cont_ = tf.Variable(cont, dtype=tf.float32)
-      
-      simple_ff = common_layers.linear_set_layer(32, x1_)
-      cont_ff = common_layers.linear_set_layer(32, x1_, context=cont_)
-      
-      session.run(tf.global_variables_initializer())
-      actual = session.run([simple_ff, cont_ff])
-    self.assertEqual(actual[0].shape, (5,4,32))
-    self.assertEqual(actual[1].shape, (5,4,32))
-    
-  def testRavanbakhshSetLayer(self):
-    x1 = np.random.rand(5,4,11)
-    cont = np.random.rand(5,13)
-    with self.test_session() as session:
-      x1_ = tf.Variable(x1, dtype=tf.float32)
-      cont_ = tf.Variable(cont, dtype=tf.float32)
-      
-      layer = common_layers.ravanbakhsh_set_layer(32, x1_)
-      
-      session.run(tf.global_variables_initializer())
-      actual = session.run(layer)
-    self.assertEqual(actual.shape, (5,4,32))
 
 
 if __name__ == "__main__":
diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py
@@ -19,6 +19,8 @@
 
 # Dependency imports
 
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
 from tensor2tensor.models import common_attention
 from tensor2tensor.models import common_hparams
 from tensor2tensor.models import common_layers
@@ -27,7 +29,6 @@
 from tensor2tensor.utils import registry
 from tensor2tensor.utils import t2t_model
 
-from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
 
 
diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py
diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+#!/usr/bin/env python`
`1`	`2`	`# Copyright 2017 Google Inc.`
`2`	`3`	`#`
`3`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`