wip feb 9

joecummings · joecummings · commit d6224661b631 · 2023-02-09T22:53:21.000-05:00
diff --git a/torchtext/prototype/generate.py b/torchtext/prototype/generate.py
@@ -275,8 +275,9 @@ def update_func(emissions, N, T, prev_step_token_idxs, prev_step_hyp_idxs, prev_
                     ), f"state_and_tokens has shape {state_and_tokens.shape} = expected {(num_samples, timestep + 1)}"
                 else:
                     assert len(prev_model_state_sequences) == 1
-                    state_and_tokens = token_indices = prev_model_state_sequences[0].expand(num_beams, -1)  # TODO: Make this more robust
-
+                    state_and_tokens = token_indices = prev_model_state_sequences[0].expand(
+                        num_beams, -1
+                    )  # TODO: Make this more robust
 
                 # Cleanup -- combine this with the above
                 if self.is_encoder_decoder:
@@ -287,14 +288,14 @@ def update_func(emissions, N, T, prev_step_token_idxs, prev_step_hyp_idxs, prev_
                     )
 
                 # Preprocess inputs for generation
-                model_inputs = self.model.prepare_inputs_for_generation(token_indices, **model_kwargs)
+                model_inputs = self.model.prepare_inputs_for_generation(
+                    token_indices, **model_kwargs
+                )  # This should technically work with state_and_tokens, but the prepare function has to splice if past (like HF does)
                 if self.is_huggingface_model:
                     model_inputs.update(self._huggingface_model_input_values)
                     if len(prev_step_hyp_idxs) > 1 and model_kwargs["past"] is not None:
-                        model_inputs["past_key_values"] = self.model._reorder_cache(
-                            model_kwargs["past"],
-                            torch.Tensor(prev_step_hyp_idxs).to(dtype=torch.int32),
-                        )
+                        beam_idxs = torch.Tensor(prev_step_hyp_idxs).to(dtype=torch.int32)
+                        model_inputs["past_key_values"] = self.model._reorder_cache(model_kwargs["past"], beam_idxs)
 
                 # Forward pass
                 outputs = self.model(**model_inputs)