tensorflow
diff --git a/‎setup.py‎
Lines changed: 1 addition & 0 deletions b/‎setup.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensor2tensor/bin/__init__.py‎
Lines changed: 15 additions & 0 deletions b/‎tensor2tensor/bin/__init__.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎tensor2tensor/bin/t2t-decoder‎
Lines changed: 6 additions & 5 deletions b/‎tensor2tensor/bin/t2t-decoder‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎tensor2tensor/bin/t2t-tpu-trainer‎
Lines changed: 45 additions & 11 deletions b/‎tensor2tensor/bin/t2t-tpu-trainer‎
Lines changed: 45 additions & 11 deletions
diff --git a/‎tensor2tensor/bin/t2t-trainer‎
Lines changed: 1 addition & 0 deletions b/‎tensor2tensor/bin/t2t-trainer‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensor2tensor/bin/t2t_decoder.py‎
Lines changed: 6 additions & 5 deletions b/‎tensor2tensor/bin/t2t_decoder.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎tensor2tensor/bin/t2t_trainer.py‎
Lines changed: 1 addition & 0 deletions b/‎tensor2tensor/bin/t2t_trainer.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensor2tensor/data_generators/problem.py‎
Lines changed: 68 additions & 44 deletions b/‎tensor2tensor/data_generators/problem.py‎
Lines changed: 68 additions & 44 deletions
diff --git a/‎tensor2tensor/data_generators/translate_enzh.py‎
Lines changed: 1 addition & 1 deletion b/‎tensor2tensor/data_generators/translate_enzh.py‎
Lines changed: 1 addition & 1 deletion
@@ -23,6 +23,7 @@
         'tensor2tensor/bin/t2t-datagen',
         'tensor2tensor/bin/t2t-decoder',
         'tensor2tensor/bin/t2t-make-tf-configs',
+        'tensor2tensor/bin/t2t-tpu-trainer',
     ],
     install_requires=[
         'bz2file',
 
@@ -0,0 +1,15 @@
+# coding=utf-8
+# Copyright 2017 The Tensor2Tensor Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
@@ -58,10 +58,11 @@ flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.")
 
 
 def create_hparams():
-  hparams = tpu_trainer.create_hparams()
-  hparams.add_hparam("data_dir", os.path.expanduser(FLAGS.data_dir))
-  tpu_trainer_lib.add_problem_hparams(hparams, FLAGS.problems)
-  return hparams
+  return tpu_trainer_lib.create_hparams(
+      FLAGS.hparams_set,
+      FLAGS.hparams,
+      data_dir=os.path.expanduser(FLAGS.data_dir),
+      problem_name=FLAGS.problems)
 
 
 def create_decode_hparams():
@@ -90,7 +91,7 @@ def decode(estimator, hparams, decode_hp):
 def main(_):
   tf.logging.set_verbosity(tf.logging.INFO)
   usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
-  FLAGS.use_tpu = False
+  FLAGS.use_tpu = False  # decoding not supported on TPU
 
   hp = create_hparams()
   decode_hp = create_decode_hparams()
 
@@ -20,12 +20,14 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import sys
 
 # Dependency imports
 
 from tensor2tensor import models  # pylint: disable=unused-import
 from tensor2tensor import problems as problems_lib  # pylint: disable=unused-import
-from tensor2tensor.tpu import tpu_trainer_lib as lib
+from tensor2tensor.tpu import tpu_trainer_lib
+from tensor2tensor.utils import decoding
 from tensor2tensor.utils import flags as t2t_flags  # pylint: disable=unused-import
 from tensor2tensor.utils import registry
 from tensor2tensor.utils import usr_dir
@@ -45,7 +47,7 @@ flags.DEFINE_string("t2t_usr_dir", "",
 flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.")
 flags.DEFINE_integer("iterations_per_loop", 1000,
                      "Number of iterations in a TPU training loop.")
-flags.DEFINE_bool("use_tpu", True, "Whether to use TPU.")
+flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.")
 
 # To maintain compatibility with some internal libs, we guard against these flag
 # definitions possibly erroring. Apologies for the ugliness.
@@ -66,38 +68,66 @@ def get_problem_name():
 
 
 def create_hparams():
-  hparams = registry.hparams(FLAGS.hparams_set)()
-  if FLAGS.hparams:
-    hparams = hparams.parse(FLAGS.hparams)
-  return hparams
+  return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams)
 
 
 def create_experiment_fn():
-  return lib.create_experiment_fn(
+  use_validation_monitor = (FLAGS.schedule in
+                            ["train_and_evaluate", "continuous_train_and_eval"]
+                            and FLAGS.local_eval_frequency)
+  return tpu_trainer_lib.create_experiment_fn(
       FLAGS.model,
       get_problem_name(),
       os.path.expanduser(FLAGS.data_dir),
       FLAGS.train_steps,
       FLAGS.eval_steps,
       FLAGS.local_eval_frequency,
       FLAGS.schedule,
+      export=FLAGS.export_saved_model,
+      decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams),
+      use_tfdbg=FLAGS.tfdbg,
+      use_dbgprofile=FLAGS.dbgprofile,
+      use_validation_monitor=use_validation_monitor,
+      eval_early_stopping_steps=FLAGS.eval_early_stopping_steps,
+      eval_early_stopping_metric=FLAGS.eval_early_stopping_metric,
+      eval_early_stopping_metric_minimize=FLAGS.
+      eval_early_stopping_metric_minimize,
       use_tpu=FLAGS.use_tpu)
 
 
-def create_run_config():
-  return lib.create_run_config(
+def create_run_config(hp):
+  return tpu_trainer_lib.create_run_config(
       model_dir=os.path.expanduser(FLAGS.output_dir),
       master=FLAGS.master,
       iterations_per_loop=FLAGS.iterations_per_loop,
       num_shards=FLAGS.tpu_num_shards,
       log_device_placement=FLAGS.log_device_placement,
       save_checkpoints_steps=max(FLAGS.iterations_per_loop,
                                  FLAGS.local_eval_frequency),
+      keep_checkpoint_max=FLAGS.keep_checkpoint_max,
+      keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
       num_gpus=FLAGS.worker_gpu,
       gpu_order=FLAGS.gpu_order,
       shard_to_cpu=FLAGS.locally_shard_to_cpu,
       num_async_replicas=FLAGS.worker_replicas,
-      use_tpu=FLAGS.use_tpu)
+      gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction,
+      enable_graph_rewriter=FLAGS.experimental_optimize_placement,
+      use_tpu=FLAGS.use_tpu,
+      schedule=FLAGS.schedule,
+      no_data_parallelism=hp.no_data_parallelism,
+      daisy_chain_variables=hp.daisy_chain_variables,
+      ps_replicas=FLAGS.ps_replicas,
+      ps_job=FLAGS.ps_job,
+      ps_gpu=FLAGS.ps_gpu,
+      sync=FLAGS.sync,
+      worker_id=FLAGS.worker_id,
+      worker_job=FLAGS.worker_job)
+
+
+def log_registry():
+  if FLAGS.registry_help:
+    tf.logging.info(registry.help_string())
+    sys.exit(0)
 
 
 def execute_schedule(exp):
@@ -111,9 +141,13 @@ def main(_):
   tf.logging.set_verbosity(tf.logging.INFO)
   tf.set_random_seed(123)
   usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
+  log_registry()
+
+  hparams = create_hparams()
+  run_config = create_run_config(hparams)
 
   exp_fn = create_experiment_fn()
-  exp = exp_fn(create_run_config(), create_hparams())
+  exp = exp_fn(run_config, hparams)
   execute_schedule(exp)
 
 
 
@@ -26,6 +26,7 @@ To train your model, for example:
       --model=transformer
       --hparams_set=transformer_base
 """
+# DEPRECATED
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
@@ -57,10 +57,11 @@
 
 
 def create_hparams():
-  hparams = tpu_trainer.create_hparams()
-  hparams.add_hparam("data_dir", os.path.expanduser(FLAGS.data_dir))
-  tpu_trainer_lib.add_problem_hparams(hparams, FLAGS.problems)
-  return hparams
+  return tpu_trainer_lib.create_hparams(
+      FLAGS.hparams_set,
+      FLAGS.hparams,
+      data_dir=os.path.expanduser(FLAGS.data_dir),
+      problem_name=FLAGS.problems)
 
 
 def create_decode_hparams():
@@ -89,7 +90,7 @@ def decode(estimator, hparams, decode_hp):
 def main(_):
   tf.logging.set_verbosity(tf.logging.INFO)
   usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
-  FLAGS.use_tpu = False
+  FLAGS.use_tpu = False  # decoding not supported on TPU
 
   hp = create_hparams()
   decode_hp = create_decode_hparams()
 
@@ -25,6 +25,7 @@
       --model=transformer
       --hparams_set=transformer_base
 """
+# DEPRECATED
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
@@ -383,13 +383,6 @@ def dataset(self,
     # Construct the Problem's hparams so that items within it are accessible
     _ = self.get_hparams(hparams)
 
-    data_fields, data_items_to_decoders = self.example_reading_spec()
-    if data_items_to_decoders is None:
-      data_items_to_decoders = {
-          field: tf.contrib.slim.tfexample_decoder.Tensor(field)
-          for field in data_fields
-      }
-
     is_training = mode == tf.estimator.ModeKeys.TRAIN
     data_filepattern = self.filepattern(data_dir, dataset_split, shard=shard)
     tf.logging.info("Reading data files from %s", data_filepattern)
@@ -406,22 +399,13 @@ def dataset(self,
     else:
       dataset = tf.data.TFRecordDataset(data_files)
 
-    def decode_record(record):
-      """Serialized Example to dict of <feature name, Tensor>."""
-      decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder(
-          data_fields, data_items_to_decoders)
-
-      decode_items = list(data_items_to_decoders)
-      decoded = decoder.decode(record, items=decode_items)
-      return dict(zip(decode_items, decoded))
-
     def _preprocess(example):
       example = self.preprocess_example(example, mode, hparams)
       self.maybe_reverse_features(example)
       self.maybe_copy_features(example)
       return example
 
-    dataset = dataset.map(decode_record, num_parallel_calls=num_threads)
+    dataset = dataset.map(self.decode_example, num_parallel_calls=num_threads)
 
     if preprocess:
       dataset = dataset.map(_preprocess, num_parallel_calls=num_threads)
@@ -430,6 +414,22 @@ def _preprocess(example):
 
     return dataset
 
+  def decode_example(self, serialized_example):
+    """Return a dict of Tensors from a serialized tensorflow.Example."""
+    data_fields, data_items_to_decoders = self.example_reading_spec()
+    if data_items_to_decoders is None:
+      data_items_to_decoders = {
+          field: tf.contrib.slim.tfexample_decoder.Tensor(field)
+          for field in data_fields
+      }
+
+    decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder(
+        data_fields, data_items_to_decoders)
+
+    decode_items = list(data_items_to_decoders)
+    decoded = decoder.decode(serialized_example, items=decode_items)
+    return dict(zip(decode_items, decoded))
+
   @property
   def has_inputs(self):
     return "inputs" in self.get_feature_encoders()
@@ -496,7 +496,8 @@ def input_fn(self, mode, hparams, params=None, config=None,
       mode: tf.estimator.ModeKeys
       hparams: HParams, model hparams
       params: dict, may include "batch_size"
-      config: RunConfig; if passed, should include t2t_device_info dict
+      config: RunConfig; should have the data_parallelism attribute if not using
+        TPU
       dataset_kwargs: dict, if passed, will pass as kwargs to self.dataset
         method when called
 
@@ -521,29 +522,8 @@ def gpu_valid_size(example):
           hparams.max_length if drop_long_sequences else 10**9)
 
     def define_shapes(example):
-      """Set the right shapes for the features."""
-      inputs = example["inputs"]
-      targets = example["targets"]
-
-      # Ensure inputs and targets are proper rank.
-      while len(inputs.get_shape()) < 4:
-        inputs = tf.expand_dims(inputs, axis=-1)
-      while len(targets.get_shape()) < 4:
-        targets = tf.expand_dims(targets, axis=-1)
-
-      example["inputs"] = inputs
-      example["targets"] = targets
-
-      if config.use_tpu:
-        # Ensure batch size is set on all features
-        for _, t in six.iteritems(example):
-          shape = t.get_shape().as_list()
-          shape[0] = params["batch_size"]
-          t.set_shape(t.get_shape().merge_with(shape))
-          # Assert shapes are fully known
-          t.get_shape().assert_is_fully_defined()
-
-      return example
+      return _standardize_shapes(
+          example, batch_size=(config.use_tpu and params["batch_size"]))
 
     # Read and preprocess
     data_dir = hparams.data_dir
@@ -569,7 +549,7 @@ def define_shapes(example):
         dataset = dataset.apply(
             tf.contrib.data.batch_and_drop_remainder(tpu_batch_size))
       else:
-        num_shards = config.t2t_device_info["num_shards"]
+        num_shards = config.data_parallelism.n
         dataset = dataset.batch(hparams.batch_size * num_shards)
     else:
       # Variable length features
@@ -586,7 +566,7 @@ def define_shapes(example):
         dataset = dataset.filter(gpu_valid_size)
         batching_scheme = data_reader.hparams_to_batching_scheme(
             hparams,
-            shard_multiplier=config.t2t_device_info["num_shards"],
+            shard_multiplier=config.data_parallelism.n,
             length_multiplier=self.get_hparams().batch_size_multiplier)
         if hparams.use_fixed_batch_size:
           batching_scheme["batch_sizes"] = [hparams.batch_size]
@@ -601,7 +581,7 @@ def define_shapes(example):
     dataset = dataset.prefetch(1)
     features = dataset.make_one_shot_iterator().get_next()
     if not config.use_tpu:
-      _summarize_features(features, config.t2t_device_info["num_shards"])
+      _summarize_features(features, config.data_parallelism.n)
 
     if mode == tf.estimator.ModeKeys.PREDICT:
       features["infer_targets"] = features["targets"]
@@ -614,6 +594,25 @@ def define_shapes(example):
 
     return features, features["targets"]
 
+  def serving_input_fn(self, hparams):
+    """Input fn for serving export, starting from serialized example."""
+    mode = tf.estimator.ModeKeys.PREDICT
+    serialized_example = tf.placeholder(
+        dtype=tf.string, shape=[None], name="serialized_example")
+    dataset = tf.data.Dataset.from_tensor_slices(serialized_example)
+    dataset = dataset.map(self.decode_example)
+    dataset = dataset.map(lambda ex: self.preprocess_example(ex, mode, hparams))
+    dataset = dataset.map(data_reader.cast_int64_to_int32)
+    dataset = dataset.padded_batch(1000, dataset.output_shapes)
+    dataset = dataset.map(_standardize_shapes)
+    features = tf.contrib.data.get_single_element(dataset)
+
+    if self.has_inputs:
+      features.pop("targets", None)
+
+    return tf.estimator.export.ServingInputReceiver(
+        features=features, receiver_tensors=serialized_example)
+
 
 class FeatureInfo(object):
 
@@ -907,3 +906,28 @@ def _summarize_features(features, num_shards=1):
         tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens)
         tf.summary.scalar("%s_nonpadding_fraction" % k,
                           tf.reduce_mean(nonpadding))
+
+
+def _standardize_shapes(features, batch_size=None):
+  """Set the right shapes for the features."""
+
+  for fname in ["inputs", "targets"]:
+    if fname not in features:
+      continue
+
+    f = features[fname]
+    while len(f.get_shape()) < 4:
+      f = tf.expand_dims(f, axis=-1)
+
+    features[fname] = f
+
+  if batch_size:
+    # Ensure batch size is set on all features
+    for _, t in six.iteritems(features):
+      shape = t.get_shape().as_list()
+      shape[0] = batch_size
+      t.set_shape(t.get_shape().merge_with(shape))
+      # Assert shapes are fully known
+      t.get_shape().assert_is_fully_defined()
+
+  return features
@@ -49,7 +49,7 @@
 
 _ENZH_TEST_DATASETS = [[
     "http://data.statmt.org/wmt17/translation-task/dev.tgz",
-    ("dev/newsdev2017-zhen-src.en.sgm", "dev/newsdev2017-zhen-ref.zh.sgm")
+    ("dev/newsdev2017-enzh-src.en.sgm", "dev/newsdev2017-enzh-ref.zh.sgm")
 ]]