From 1c4638bd9e6c525754725485580bff0b6adf939b Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Tue, 5 May 2020 17:59:13 -0400
Subject: [PATCH 01/21] Add latent prob and latent rng functions for hmm

---
 stan/math/prim/err.hpp                        |   1 +
 stan/math/prim/err/hmm_check.hpp              |  50 +++++++++
 stan/math/prim/prob.hpp                       |   2 +
 .../prim/prob/hmm_latent_marginal_prob.hpp    |  83 ++++++++++++++
 stan/math/prim/prob/hmm_latent_rng.hpp        | 101 ++++++++++++++++++
 stan/math/prim/prob/hmm_marginal_lpdf.hpp     |  11 +-
 test/unit/math/prim/err/hmm_check_test.cpp    |  74 +++++++++++++
 .../prob/hmm_latent_marginal_prob_test.cpp    |  45 ++++++++
 .../math/prim/prob/hmm_latent_rng_test.cpp    |  42 ++++++++
 .../unit/math/prim/prob/hmm_marginal_test.cpp |   6 +-
 10 files changed, 402 insertions(+), 13 deletions(-)
 create mode 100644 stan/math/prim/err/hmm_check.hpp
 create mode 100644 stan/math/prim/prob/hmm_latent_marginal_prob.hpp
 create mode 100644 stan/math/prim/prob/hmm_latent_rng.hpp
 create mode 100644 test/unit/math/prim/err/hmm_check_test.cpp
 create mode 100644 test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
 create mode 100644 test/unit/math/prim/prob/hmm_latent_rng_test.cpp
diff --git a/stan/math/prim/err.hpp b/stan/math/prim/err.hpp
index d66a79c41e4..049904e7023 100644
--- a/stan/math/prim/err.hpp
+++ b/stan/math/prim/err.hpp
@@ -47,6 +47,7 @@
 #include <stan/math/prim/err/domain_error.hpp>
 #include <stan/math/prim/err/domain_error_vec.hpp>
 #include <stan/math/prim/err/elementwise_check.hpp>
+#include <stan/math/prim/err/hmm_check.hpp>
 #include <stan/math/prim/err/invalid_argument.hpp>
 #include <stan/math/prim/err/invalid_argument_vec.hpp>
 #include <stan/math/prim/err/is_cholesky_factor.hpp>
diff --git a/stan/math/prim/err/hmm_check.hpp b/stan/math/prim/err/hmm_check.hpp
new file mode 100644
index 00000000000..7b3717f8c44
--- /dev/null
+++ b/stan/math/prim/err/hmm_check.hpp
@@ -0,0 +1,50 @@
+#ifndef STAN_MATH_PRIM_ERR_HMM_CHECK_HPP
+#define STAN_MATH_PRIM_ERR_HMM_CHECK_HPP
+
+#include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/row.hpp>
+
+namespace stan {
+namespace math {
+
+/**
+ * Check arguments for hidden Markov model functions with a discrete
+ * latent state (lpdf, rng for latent states, and marginal probabilities
+ * for latent sates).
+ *
+ * @tparam T_omega type of the log likelihood matrix
+ * @tparam T_Gamma type of the transition matrix
+ * @tparam T_rho type of the initial guess vector
+ * @param[in] log_omegas log matrix of observational densities.
+ * @param[in] Gamma transition density between hidden states.
+ * @param[in] rho initial state
+ * @param[in] function the name of the function using the arguments.
+ * @throw `std::invalid_argument` if Gamma is not square, when we have
+ *         at least one transition, or if the size of rho is not the
+ *         number of rows of log_omegas.
+ * @throw `std::domain_error` if rho is not a simplex and of the rows
+ *         of Gamma are not a simplex (when there is at least one transition).
+ */
+template <typename T_omega, typename T_Gamma, typename T_rho>
+inline void hmm_check(
+  const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
+  const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
+  const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho,
+  const char* function) {
+  int n_states = log_omegas.rows();
+  int n_transitions = log_omegas.cols() - 1;
+
+  check_consistent_size(function, "rho", rho, n_states);
+  check_simplex(function, "rho", rho);
+  check_square(function, "Gamma", Gamma);
+  check_nonzero_size(function, "Gamma", Gamma);
+  check_multiplicable(function, "Gamma", Gamma, "log_omegas",
+                      log_omegas);
+  for (int i = 0; i < Gamma.rows(); ++i) {
+    check_simplex(function, "Gamma[i, ]", row(Gamma, i + 1));
+  }
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/stan/math/prim/prob.hpp b/stan/math/prim/prob.hpp
index 2fc90e047a8..98aa434dd20 100644
--- a/stan/math/prim/prob.hpp
+++ b/stan/math/prim/prob.hpp
@@ -134,6 +134,8 @@
 #include <stan/math/prim/prob/gumbel_log.hpp>
 #include <stan/math/prim/prob/gumbel_lpdf.hpp>
 #include <stan/math/prim/prob/gumbel_rng.hpp>
+#include <stan/math/prim/prob/hmm_latent_marginal_prob.hpp>
+#include <stan/math/prim/prob/hmm_latent_rng.hpp>
 #include <stan/math/prim/prob/hmm_marginal_lpdf.hpp>
 #include <stan/math/prim/prob/hypergeometric_log.hpp>
 #include <stan/math/prim/prob/hypergeometric_lpmf.hpp>
diff --git a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
new file mode 100644
index 00000000000..9d65dedfeda
--- /dev/null
+++ b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
@@ -0,0 +1,83 @@
+#ifndef STAN_MATH_PRIM_PROB_HMM_LATENT_MARGINAL_PROB_HPP
+#define STAN_MATH_PRIM_PROB_HMM_LATENT_MARGINAL_PROB_HPP
+
+#include <stan/math/prim/core.hpp>
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err/hmm_check.hpp>
+#include <Eigen/Core>
+#include <boost/random.hpp>
+#include <vector>
+
+namespace stan {
+namespace math {
+
+/**
+ * For a hidden Markov model with observation y, hidden state x,
+ * and parameters theta, compute the marginal posterior probability
+ * of the hidden states. In this setting, the hidden states are discrete
+ * and takes values over the finite space {1, ..., K}.
+ * log_omegas is a matrix of observational densities, where
+ * the (i, j)th entry corresponds to the density of the ith observation, y_i,
+ * given x_i = j.
+ * The transition matrix Gamma is such that the (i, j)th entry is the
+ * probability that x_n = j given x_{n - 1} = i. The rows of Gamma are
+ * simplexes.
+ * Note we only calculate marginal probabilities. However correlation between
+ * the latent states must be considered in order to sample latent states.
+ *
+ * @tparam T_omega type of the log likelihood matrix
+ * @tparam T_Gamma type of the transition matrix
+ * @tparam T_rho type of the initial guess vector
+ * @param[in] log_omegas log matrix of observational densities.
+ * @param[in] Gamma transition density between hidden states.
+ * @param[in] rho initial state
+ * @return the posterior probability for each latent state.
+ * @throw `std::invalid_argument` if Gamma is not square, when we have
+ *         at least one transition, or if the size of rho is not the
+ *         number of rows of log_omegas.
+ * @throw `std::domain_error` if rho is not a simplex and of the rows
+ *         of Gamma are not a simplex (when there is at least one transition).
+ */
+template <typename T_omega, typename T_Gamma, typename T_rho>
+inline Eigen::MatrixXd hmm_latent_marginal_prob(
+  const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
+  const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
+  const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho) {
+  int n_states = log_omegas.rows();
+  int n_transitions = log_omegas.cols() - 1;
+
+  hmm_check(log_omegas, Gamma, rho, "hmm_latent_marginal_prob");
+
+  Eigen::MatrixXd omegas = value_of(log_omegas).array().exp();
+  Eigen::VectorXd rho_dbl = value_of(rho);
+  Eigen::MatrixXd Gamma_dbl = value_of(Gamma);
+
+  Eigen::MatrixXd alphas(n_states, n_transitions + 1);
+  alphas.col(0) = omegas.col(0).cwiseProduct(rho_dbl);
+  alphas.col(0) /= alphas.col(0).maxCoeff();
+
+  Eigen::MatrixXd Gamma_dbl_transpose = Gamma_dbl.transpose();
+  for (int n = 1; n <= n_transitions; ++n)
+    alphas.col(n) = omegas.col(n).cwiseProduct(Gamma_dbl_transpose
+                      * alphas.col(n - 1));
+
+  // Backward pass with running normalization
+  Eigen::VectorXd beta = Eigen::VectorXd::Ones(n_states);
+
+  alphas.col(n_transitions) /= alphas.col(n_transitions).sum();
+
+  for (int n = n_transitions - 1; n >= 0; --n) {
+    beta = Gamma_dbl * (omegas.col(n + 1).cwiseProduct(beta));
+    beta /= beta.maxCoeff();
+
+    // Reuse alphas to store probabilities
+    alphas.col(n) = alphas.col(n).cwiseProduct(beta);
+    alphas.col(n) /= alphas.col(n).sum();
+  }
+
+  return alphas;
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/stan/math/prim/prob/hmm_latent_rng.hpp b/stan/math/prim/prob/hmm_latent_rng.hpp
new file mode 100644
index 00000000000..5f97eaa7105
--- /dev/null
+++ b/stan/math/prim/prob/hmm_latent_rng.hpp
@@ -0,0 +1,101 @@
+#ifndef STAN_MATH_PRIM_PROB_HMM_LATENT_RNG_HPP
+#define STAN_MATH_PRIM_PROB_HMM_LATENT_RNG_HPP
+
+#include <stan/math/prim/core.hpp>
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err/hmm_check.hpp>
+#include <Eigen/Core>
+#include <boost/random.hpp>
+#include <vector>
+
+namespace stan {
+namespace math {
+
+/**
+* For a hidden Markov model with observation y, hidden state x,
+* and parameters theta, generate samples from the posterior distribution
+* of the hidden states, x.
+* In this setting, the hidden states are discrete
+* and takes values over the finite space {1, ..., K}.
+* log_omegas is a matrix of observational densities, where
+* the (i, j)th entry corresponds to the density of the ith observation, y_i,
+* given x_i = j.
+* The transition matrix Gamma is such that the (i, j)th entry is the
+* probability that x_n = j given x_{n - 1} = i. The rows of Gamma are
+* simplexes.
+*
+* @tparam T_omega type of the log likelihood matrix
+* @tparam T_Gamma type of the transition matrix
+* @tparam T_rho type of the initial guess vector
+* @param[in] log_omegas log matrix of observational densities.
+* @param[in] Gamma transition density between hidden states.
+* @param[in] rho initial state
+* @param[in] rng random number generator (duh!)
+* @return sample from the posterior distribution of the hidden states.
+* @throw `std::invalid_argument` if Gamma is not square, when we have
+*         at least one transition, or if the size of rho is not the
+*         number of rows of log_omegas.
+* @throw `std::domain_error` if rho is not a simplex and of the rows
+*         of Gamma are not a simplex (when there is at least one transition).
+*/
+template <typename T_omega, typename T_Gamma, typename T_rho, class RNG>
+inline std::vector<int> hmm_latent_rng(
+  const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
+  const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
+  const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho,
+  RNG& rng) {
+  int n_states = log_omegas.rows();
+  int n_transitions = log_omegas.cols() - 1;
+
+  hmm_check(log_omegas, Gamma, rho, "hmm_latent_rng");
+
+  Eigen::MatrixXd omegas = value_of(log_omegas).array().exp();
+  Eigen::VectorXd rho_dbl = value_of(rho);
+  Eigen::MatrixXd Gamma_dbl = value_of(Gamma);
+
+  Eigen::MatrixXd alphas(n_states, n_transitions + 1);
+  alphas.col(0) = omegas.col(0).cwiseProduct(rho_dbl);
+  alphas.col(0) /= alphas.col(0).maxCoeff();
+
+  Eigen::MatrixXd Gamma_dbl_transpose = Gamma_dbl.transpose();
+  for (int n = 0; n < n_transitions; ++n) {
+    alphas.col(n + 1) = omegas.col(n + 1).
+                          cwiseProduct(Gamma_dbl_transpose * alphas.col(n));
+    alphas.col(n + 1) /= alphas.col(n + 1).maxCoeff();
+  }
+
+  Eigen::VectorXd beta = Eigen::VectorXd::Ones(n_states);
+
+  // sample the last hidden state
+  std::vector<int> hidden_states(n_transitions + 1);
+  Eigen::VectorXd probs_vec = alphas.col(n_transitions)
+                              / alphas.col(n_transitions).sum();
+  std::vector<double> probs(probs_vec.data(), probs_vec.data() + n_states);
+  boost::random::discrete_distribution<> cat_hidden(probs);
+  hidden_states[n_transitions] = cat_hidden(rng);
+
+  for (int n = n_transitions - 1; n >= 0; --n) {
+    // sample the nth hidden state conditional on (n + 1)st hidden state
+    int last_hs = hidden_states[n + 1];
+
+    // CHECK -- do we need to redeclare the hidden state?
+    for (int k = 0; k < n_states; ++k) {
+      probs_vec[k] = alphas(k, n) * omegas(last_hs, n + 1)
+                     * Gamma_dbl_transpose(last_hs, k) * beta(last_hs);
+    }
+    probs_vec /= probs_vec.sum();
+    std::vector<double> probs(probs_vec.data(), probs_vec.data() + n_states);
+    boost::random::discrete_distribution<> cat_hidden(probs);
+    hidden_states[n] = cat_hidden(rng);
+
+    // update backwards state
+    beta = Gamma_dbl * (omegas.col(n + 1).cwiseProduct(beta));
+    beta /= beta.maxCoeff();
+  }
+
+  return hidden_states;
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/stan/math/prim/prob/hmm_marginal_lpdf.hpp b/stan/math/prim/prob/hmm_marginal_lpdf.hpp
index 4dd4fe1cb09..1ef4195a1d2 100644
--- a/stan/math/prim/prob/hmm_marginal_lpdf.hpp
+++ b/stan/math/prim/prob/hmm_marginal_lpdf.hpp
@@ -56,7 +56,6 @@ inline auto hmm_marginal_lpdf_val(
  * The transition matrix Gamma is such that the (i, j)th entry is the
  * probability that x_n = j given x_{n - 1} = i. The rows of Gamma are
  * simplexes.
- * The Gamma argument is only checked if there is at least one transition.
  *
  * @tparam T_omega type of the log likelihood matrix
  * @tparam T_Gamma type of the transition matrix
@@ -83,15 +82,7 @@ inline auto hmm_marginal_lpdf(
   int n_states = log_omegas.rows();
   int n_transitions = log_omegas.cols() - 1;
 
-  check_consistent_size("hmm_marginal_lpdf", "rho", rho, n_states);
-  check_simplex("hmm_marginal_lpdf", "rho", rho);
-  check_square("hmm_marginal_lpdf", "Gamma", Gamma);
-  check_nonzero_size("hmm_marginal_lpdf", "Gamma", Gamma);
-  check_multiplicable("hmm_marginal_lpdf", "Gamma", Gamma, "log_omegas",
-                      log_omegas);
-  for (int i = 0; i < Gamma.rows(); ++i) {
-    check_simplex("hmm_marginal_lpdf", "Gamma[i, ]", row(Gamma, i + 1));
-  }
+  hmm_check(log_omegas, Gamma, rho, "hmm_marginal_lpdf");
 
   operands_and_partials<Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>,
                         Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>,
diff --git a/test/unit/math/prim/err/hmm_check_test.cpp b/test/unit/math/prim/err/hmm_check_test.cpp
new file mode 100644
index 00000000000..8a42cd2fac4
--- /dev/null
+++ b/test/unit/math/prim/err/hmm_check_test.cpp
@@ -0,0 +1,74 @@
+#include <stan/math/prim/err/hmm_check.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+
+TEST(err, hmm_check) {
+  using Eigen::MatrixXd;
+  using Eigen::VectorXd;
+  using stan::math::hmm_check;
+
+  int n_states = 2;
+  int n_transitions = 2;
+  MatrixXd log_omegas(n_states, n_transitions + 1);
+  MatrixXd Gamma(n_states, n_states);
+  VectorXd rho(n_states);
+
+  for (int i = 0; i < n_states; i++)
+    for (int j = 0; j < n_transitions + 1; j++)
+      log_omegas(i, j) = 1;
+
+  rho(0) = 0.65;
+  rho(1) = 0.35;
+  Gamma << 0.8, 0.2, 0.6, 0.4;
+
+  // Gamma is not square.
+  MatrixXd Gamma_rec(n_states, n_states + 1);
+  EXPECT_THROW_MSG(
+      hmm_check(log_omegas, Gamma_rec, rho, "hmm_marginal_lpdf"),
+      std::invalid_argument,
+      "hmm_marginal_lpdf: Expecting a square matrix; rows of Gamma (2) "
+      "and columns of Gamma (3) must match in size")
+
+  // Gamma has a column that is not a simplex.
+  MatrixXd Gamma_bad = Gamma;
+  Gamma_bad(0, 0) = Gamma(0, 0) + 1;
+  EXPECT_THROW_MSG(hmm_check(log_omegas, Gamma_bad, rho, "hmm_marginal_lpdf"),
+                   std::domain_error,
+                   "hmm_marginal_lpdf: Gamma[i, ] is not a valid simplex. "
+                   "sum(Gamma[i, ]) = 2, but should be 1")
+
+  // The size of Gamma is 0, even though there is at least one transition
+  MatrixXd Gamma_empty(0, 0);
+  EXPECT_THROW_MSG(
+      hmm_check(log_omegas, Gamma_empty, rho, "hmm_marginal_lpdf"),
+      std::invalid_argument,
+      "hmm_marginal_lpdf: Gamma has size 0, but must have a non-zero size")
+
+  // The size of Gamma is inconsistent with that of log_omega
+  MatrixXd Gamma_wrong_size(n_states + 1, n_states + 1);
+
+  EXPECT_THROW_MSG(hmm_check(log_omegas, Gamma_wrong_size, rho,
+                   "hmm_marginal_lpdf"),
+                   std::invalid_argument,
+                   "hmm_marginal_lpdf: Columns of Gamma (3)"
+                   " and Rows of log_omegas (2) must match in size")
+
+  // rho is not a simplex.
+  VectorXd rho_bad = rho;
+  rho_bad(0) = rho(0) + 1;
+  EXPECT_THROW_MSG(hmm_check(log_omegas, Gamma, rho_bad, "hmm_marginal_lpdf"),
+                   std::domain_error,
+                   "hmm_marginal_lpdf: rho is not a valid simplex. "
+                   "sum(rho) = 2, but should be 1")
+
+  // The size of rho is inconsistent with that of log_omega
+  VectorXd rho_wrong_size(n_states + 1);
+  EXPECT_THROW_MSG(
+      hmm_check(log_omegas, Gamma, rho_wrong_size, "hmm_marginal_lpdf"),
+      std::invalid_argument,
+      "hmm_marginal_lpdf: rho has dimension = 3, expecting dimension = 2;"
+      " a function was called with arguments of different scalar,"
+      " array, vector, or matrix types, and they were not consistently sized;"
+      "  all arguments must be scalars or multidimensional values of"
+      " the same shape.")
+}
diff --git a/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp b/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
new file mode 100644
index 00000000000..f77daa7a703
--- /dev/null
+++ b/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
@@ -0,0 +1,45 @@
+#include <test/unit/math/prim/prob/hmm_marginal_test.cpp>
+#include <stan/math/prim/prob/hmm_latent_marginal_prob.hpp>
+#include <boost/math/distributions.hpp>
+#include <boost/random.hpp>
+#include <test/unit/math/test_ad.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+#include <limits>
+#include <vector>
+
+
+TEST_F(hmm_test, latent_prob_single_outcome) {
+  using stan::math::hmm_latent_marginal_prob;
+
+  int n_states = 2;
+  Eigen::MatrixXd Gamma(n_states, n_states);
+  Gamma << 1, 0, 1, 0;
+  Eigen::VectorXd rho(n_states);
+  rho << 1, 0;
+
+  Eigen::MatrixXd prob = hmm_latent_marginal_prob(log_omegas_, Gamma, rho);
+
+  for (int i = 0; i < n_transitions_; i++) {
+    EXPECT_EQ(prob(0, i), 1);
+    EXPECT_EQ(prob(1, i), 0);
+  }
+}
+
+TEST_F(hmm_test, latent_prob_identity_transition) {
+  // With an identity transition matrix, all latent probabilities
+  // are equal. Setting the log density to 1 for all states makes
+  // the initial prob drive the subsequent probabilities.
+  using stan::math::hmm_latent_marginal_prob;
+  int n_states = 2;
+  Eigen::MatrixXd Gamma = Eigen::MatrixXd::Identity(n_states, n_states);
+  Eigen::MatrixXd log_omegas =
+    Eigen::MatrixXd::Ones(n_states, n_transitions_ + 1);
+
+  Eigen::MatrixXd prob = hmm_latent_marginal_prob(log_omegas, Gamma, rho_);
+
+  for (int i = 0; i < n_transitions_; i++) {
+    EXPECT_FLOAT_EQ(prob(0, i), rho_(0));
+    EXPECT_FLOAT_EQ(prob(1, i), rho_(1));
+  }
+}
diff --git a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
new file mode 100644
index 00000000000..afa56bb728e
--- /dev/null
+++ b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
@@ -0,0 +1,42 @@
+#include <test/unit/math/prim/prob/hmm_marginal_test.cpp>
+#include <test/unit/math/prim/prob/vector_rng_test_helper.hpp>
+#include <stan/math/prim/prob/hmm_latent_rng.hpp>
+#include <boost/math/distributions.hpp>
+#include <boost/random.hpp>
+#include <test/unit/math/test_ad.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+#include <limits>
+#include <vector>
+
+TEST(hmm_rng_test, chiSquareGoodnessFitTest) {
+  // with identity transition and constant log_omegas, the sampled latent
+  // states are identifcal and follow a Bernoulli distribution parameterized
+  // by rho.
+  using stan::math::hmm_latent_rng;
+
+  int n_states = 2;
+  int n_transitions = 10;
+  Eigen::MatrixXd Gamma = Eigen::MatrixXd::Identity(n_states, n_states);
+  Eigen::VectorXd rho(n_states);
+  rho << 0.65, 0.35;
+  Eigen::MatrixXd log_omegas =
+    Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
+
+  boost::random::mt19937 rng;
+  int N = 10000;
+
+  std::vector<double> expected;
+  expected.push_back(N * rho(0));
+  expected.push_back(N * rho(1));
+
+  std::vector<int> counts(2);
+  std::vector<int> state;
+  for (int i = 0; i < N; ++i) {
+    state = hmm_latent_rng(log_omegas, Gamma, rho, rng);
+    for (int j = 1; j < n_states; ++j) EXPECT_EQ(state[j], state[0]);
+    ++counts[state[0]];
+  }
+
+  assert_chi_squared(counts, expected, 1e-6);
+}
diff --git a/test/unit/math/prim/prob/hmm_marginal_test.cpp b/test/unit/math/prim/prob/hmm_marginal_test.cpp
index 137a6eb65bc..13450471c21 100644
--- a/test/unit/math/prim/prob/hmm_marginal_test.cpp
+++ b/test/unit/math/prim/prob/hmm_marginal_test.cpp
@@ -53,7 +53,7 @@ double state_lpdf(double y, double abs_mu, double sigma, int state) {
   return -0.5 * chi * chi - 0.5 * std::log(2 * M_PI) - std::log(sigma);
 }
 
-class hmm_marginal_lpdf_test : public ::testing::Test {
+class hmm_test : public ::testing::Test {
  protected:
   void SetUp() override {
     n_states_ = 2;
@@ -114,7 +114,7 @@ class hmm_marginal_lpdf_test : public ::testing::Test {
 // For evaluation of the density, the C++ code is benchmarked against
 // a forward algorithm written in R.
 // TODO(charlesm93): Add public repo link with R script.
-TEST_F(hmm_marginal_lpdf_test, ten_transitions) {
+TEST_F(hmm_test, ten_transitions) {
   using stan::math::hmm_marginal_lpdf;
 
   EXPECT_FLOAT_EQ(-18.37417, hmm_marginal_lpdf(log_omegas_, Gamma_, rho_));
@@ -130,7 +130,7 @@ TEST_F(hmm_marginal_lpdf_test, ten_transitions) {
                         rho_unconstrained_);
 }
 
-TEST_F(hmm_marginal_lpdf_test, zero_transitions) {
+TEST_F(hmm_test, zero_transitions) {
   using stan::math::hmm_marginal_lpdf;
 
   EXPECT_FLOAT_EQ(-1.520827, hmm_marginal_lpdf(log_omegas_zero_, Gamma_, rho_));

From a241ad0f20d187d8ba6c29b2f297d31ecceb177c Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Tue, 5 May 2020 18:56:32 -0400
Subject: [PATCH 02/21] [Jenkins] auto-formatting by clang-format version 6.0.0

---
 stan/math/prim/err/hmm_check.hpp              | 10 ++-
 .../prim/prob/hmm_latent_marginal_prob.hpp    | 10 +--
 stan/math/prim/prob/hmm_latent_rng.hpp        | 67 +++++++++----------
 test/unit/math/prim/err/hmm_check_test.cpp    | 10 +--
 .../prob/hmm_latent_marginal_prob_test.cpp    |  5 +-
 .../math/prim/prob/hmm_latent_rng_test.cpp    |  7 +-
 6 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/stan/math/prim/err/hmm_check.hpp b/stan/math/prim/err/hmm_check.hpp
index 7b3717f8c44..237b84a1b13 100644
--- a/stan/math/prim/err/hmm_check.hpp
+++ b/stan/math/prim/err/hmm_check.hpp
@@ -27,10 +27,9 @@ namespace math {
  */
 template <typename T_omega, typename T_Gamma, typename T_rho>
 inline void hmm_check(
-  const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
-  const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
-  const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho,
-  const char* function) {
+    const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
+    const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
+    const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho, const char* function) {
   int n_states = log_omegas.rows();
   int n_transitions = log_omegas.cols() - 1;
 
@@ -38,8 +37,7 @@ inline void hmm_check(
   check_simplex(function, "rho", rho);
   check_square(function, "Gamma", Gamma);
   check_nonzero_size(function, "Gamma", Gamma);
-  check_multiplicable(function, "Gamma", Gamma, "log_omegas",
-                      log_omegas);
+  check_multiplicable(function, "Gamma", Gamma, "log_omegas", log_omegas);
   for (int i = 0; i < Gamma.rows(); ++i) {
     check_simplex(function, "Gamma[i, ]", row(Gamma, i + 1));
   }
diff --git a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
index 9d65dedfeda..7d81e52acad 100644
--- a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
+++ b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
@@ -40,9 +40,9 @@ namespace math {
  */
 template <typename T_omega, typename T_Gamma, typename T_rho>
 inline Eigen::MatrixXd hmm_latent_marginal_prob(
-  const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
-  const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
-  const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho) {
+    const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
+    const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
+    const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho) {
   int n_states = log_omegas.rows();
   int n_transitions = log_omegas.cols() - 1;
 
@@ -58,8 +58,8 @@ inline Eigen::MatrixXd hmm_latent_marginal_prob(
 
   Eigen::MatrixXd Gamma_dbl_transpose = Gamma_dbl.transpose();
   for (int n = 1; n <= n_transitions; ++n)
-    alphas.col(n) = omegas.col(n).cwiseProduct(Gamma_dbl_transpose
-                      * alphas.col(n - 1));
+    alphas.col(n)
+        = omegas.col(n).cwiseProduct(Gamma_dbl_transpose * alphas.col(n - 1));
 
   // Backward pass with running normalization
   Eigen::VectorXd beta = Eigen::VectorXd::Ones(n_states);
diff --git a/stan/math/prim/prob/hmm_latent_rng.hpp b/stan/math/prim/prob/hmm_latent_rng.hpp
index 5f97eaa7105..43d2286c4ca 100644
--- a/stan/math/prim/prob/hmm_latent_rng.hpp
+++ b/stan/math/prim/prob/hmm_latent_rng.hpp
@@ -12,38 +12,37 @@ namespace stan {
 namespace math {
 
 /**
-* For a hidden Markov model with observation y, hidden state x,
-* and parameters theta, generate samples from the posterior distribution
-* of the hidden states, x.
-* In this setting, the hidden states are discrete
-* and takes values over the finite space {1, ..., K}.
-* log_omegas is a matrix of observational densities, where
-* the (i, j)th entry corresponds to the density of the ith observation, y_i,
-* given x_i = j.
-* The transition matrix Gamma is such that the (i, j)th entry is the
-* probability that x_n = j given x_{n - 1} = i. The rows of Gamma are
-* simplexes.
-*
-* @tparam T_omega type of the log likelihood matrix
-* @tparam T_Gamma type of the transition matrix
-* @tparam T_rho type of the initial guess vector
-* @param[in] log_omegas log matrix of observational densities.
-* @param[in] Gamma transition density between hidden states.
-* @param[in] rho initial state
-* @param[in] rng random number generator (duh!)
-* @return sample from the posterior distribution of the hidden states.
-* @throw `std::invalid_argument` if Gamma is not square, when we have
-*         at least one transition, or if the size of rho is not the
-*         number of rows of log_omegas.
-* @throw `std::domain_error` if rho is not a simplex and of the rows
-*         of Gamma are not a simplex (when there is at least one transition).
-*/
+ * For a hidden Markov model with observation y, hidden state x,
+ * and parameters theta, generate samples from the posterior distribution
+ * of the hidden states, x.
+ * In this setting, the hidden states are discrete
+ * and takes values over the finite space {1, ..., K}.
+ * log_omegas is a matrix of observational densities, where
+ * the (i, j)th entry corresponds to the density of the ith observation, y_i,
+ * given x_i = j.
+ * The transition matrix Gamma is such that the (i, j)th entry is the
+ * probability that x_n = j given x_{n - 1} = i. The rows of Gamma are
+ * simplexes.
+ *
+ * @tparam T_omega type of the log likelihood matrix
+ * @tparam T_Gamma type of the transition matrix
+ * @tparam T_rho type of the initial guess vector
+ * @param[in] log_omegas log matrix of observational densities.
+ * @param[in] Gamma transition density between hidden states.
+ * @param[in] rho initial state
+ * @param[in] rng random number generator (duh!)
+ * @return sample from the posterior distribution of the hidden states.
+ * @throw `std::invalid_argument` if Gamma is not square, when we have
+ *         at least one transition, or if the size of rho is not the
+ *         number of rows of log_omegas.
+ * @throw `std::domain_error` if rho is not a simplex and of the rows
+ *         of Gamma are not a simplex (when there is at least one transition).
+ */
 template <typename T_omega, typename T_Gamma, typename T_rho, class RNG>
 inline std::vector<int> hmm_latent_rng(
-  const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
-  const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
-  const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho,
-  RNG& rng) {
+    const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
+    const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
+    const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho, RNG& rng) {
   int n_states = log_omegas.rows();
   int n_transitions = log_omegas.cols() - 1;
 
@@ -59,8 +58,8 @@ inline std::vector<int> hmm_latent_rng(
 
   Eigen::MatrixXd Gamma_dbl_transpose = Gamma_dbl.transpose();
   for (int n = 0; n < n_transitions; ++n) {
-    alphas.col(n + 1) = omegas.col(n + 1).
-                          cwiseProduct(Gamma_dbl_transpose * alphas.col(n));
+    alphas.col(n + 1)
+        = omegas.col(n + 1).cwiseProduct(Gamma_dbl_transpose * alphas.col(n));
     alphas.col(n + 1) /= alphas.col(n + 1).maxCoeff();
   }
 
@@ -68,8 +67,8 @@ inline std::vector<int> hmm_latent_rng(
 
   // sample the last hidden state
   std::vector<int> hidden_states(n_transitions + 1);
-  Eigen::VectorXd probs_vec = alphas.col(n_transitions)
-                              / alphas.col(n_transitions).sum();
+  Eigen::VectorXd probs_vec
+      = alphas.col(n_transitions) / alphas.col(n_transitions).sum();
   std::vector<double> probs(probs_vec.data(), probs_vec.data() + n_states);
   boost::random::discrete_distribution<> cat_hidden(probs);
   hidden_states[n_transitions] = cat_hidden(rng);
diff --git a/test/unit/math/prim/err/hmm_check_test.cpp b/test/unit/math/prim/err/hmm_check_test.cpp
index 8a42cd2fac4..f2862746199 100644
--- a/test/unit/math/prim/err/hmm_check_test.cpp
+++ b/test/unit/math/prim/err/hmm_check_test.cpp
@@ -47,11 +47,11 @@ TEST(err, hmm_check) {
   // The size of Gamma is inconsistent with that of log_omega
   MatrixXd Gamma_wrong_size(n_states + 1, n_states + 1);
 
-  EXPECT_THROW_MSG(hmm_check(log_omegas, Gamma_wrong_size, rho,
-                   "hmm_marginal_lpdf"),
-                   std::invalid_argument,
-                   "hmm_marginal_lpdf: Columns of Gamma (3)"
-                   " and Rows of log_omegas (2) must match in size")
+  EXPECT_THROW_MSG(
+      hmm_check(log_omegas, Gamma_wrong_size, rho, "hmm_marginal_lpdf"),
+      std::invalid_argument,
+      "hmm_marginal_lpdf: Columns of Gamma (3)"
+      " and Rows of log_omegas (2) must match in size")
 
   // rho is not a simplex.
   VectorXd rho_bad = rho;
diff --git a/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp b/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
index f77daa7a703..1a1be382921 100644
--- a/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
+++ b/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
@@ -8,7 +8,6 @@
 #include <limits>
 #include <vector>
 
-
 TEST_F(hmm_test, latent_prob_single_outcome) {
   using stan::math::hmm_latent_marginal_prob;
 
@@ -33,8 +32,8 @@ TEST_F(hmm_test, latent_prob_identity_transition) {
   using stan::math::hmm_latent_marginal_prob;
   int n_states = 2;
   Eigen::MatrixXd Gamma = Eigen::MatrixXd::Identity(n_states, n_states);
-  Eigen::MatrixXd log_omegas =
-    Eigen::MatrixXd::Ones(n_states, n_transitions_ + 1);
+  Eigen::MatrixXd log_omegas
+      = Eigen::MatrixXd::Ones(n_states, n_transitions_ + 1);
 
   Eigen::MatrixXd prob = hmm_latent_marginal_prob(log_omegas, Gamma, rho_);
 
diff --git a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
index afa56bb728e..34f94a24f85 100644
--- a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
+++ b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
@@ -20,8 +20,8 @@ TEST(hmm_rng_test, chiSquareGoodnessFitTest) {
   Eigen::MatrixXd Gamma = Eigen::MatrixXd::Identity(n_states, n_states);
   Eigen::VectorXd rho(n_states);
   rho << 0.65, 0.35;
-  Eigen::MatrixXd log_omegas =
-    Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
+  Eigen::MatrixXd log_omegas
+      = Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
 
   boost::random::mt19937 rng;
   int N = 10000;
@@ -34,7 +34,8 @@ TEST(hmm_rng_test, chiSquareGoodnessFitTest) {
   std::vector<int> state;
   for (int i = 0; i < N; ++i) {
     state = hmm_latent_rng(log_omegas, Gamma, rho, rng);
-    for (int j = 1; j < n_states; ++j) EXPECT_EQ(state[j], state[0]);
+    for (int j = 1; j < n_states; ++j)
+      EXPECT_EQ(state[j], state[0]);
     ++counts[state[0]];
   }
 

From 01282650aba46b97f6f1eb2db734476e83fa5870 Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Tue, 12 May 2020 15:40:54 -0400
Subject: [PATCH 03/21] Correct doc for hmm_check.hpp

---
 stan/math/prim/err/hmm_check.hpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/stan/math/prim/err/hmm_check.hpp b/stan/math/prim/err/hmm_check.hpp
index 237b84a1b13..acdf3ce903e 100644
--- a/stan/math/prim/err/hmm_check.hpp
+++ b/stan/math/prim/err/hmm_check.hpp
@@ -19,11 +19,10 @@ namespace math {
  * @param[in] Gamma transition density between hidden states.
  * @param[in] rho initial state
  * @param[in] function the name of the function using the arguments.
- * @throw `std::invalid_argument` if Gamma is not square, when we have
- *         at least one transition, or if the size of rho is not the
- *         number of rows of log_omegas.
- * @throw `std::domain_error` if rho is not a simplex and of the rows
- *         of Gamma are not a simplex (when there is at least one transition).
+ * @throw `std::invalid_argument` if Gamma is not square
+ *         or if the size of rho is not the number of rows of log_omegas.
+ * @throw `std::domain_error` if rho is not a simplex or the rows
+ *         of Gamma are not a simplex.
  */
 template <typename T_omega, typename T_Gamma, typename T_rho>
 inline void hmm_check(

From 392b7d638b8801e9f7241970955e5561e431ac31 Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Tue, 12 May 2020 16:52:09 -0400
Subject: [PATCH 04/21] Address reviewers comments.

---
 .../prim/prob/hmm_latent_marginal_prob.hpp    | 38 ++++++++++---------
 stan/math/prim/prob/hmm_latent_rng.hpp        | 13 +++----
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
index 7d81e52acad..73e8f014061 100644
--- a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
+++ b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
@@ -13,30 +13,34 @@ namespace math {
 
 /**
  * For a hidden Markov model with observation y, hidden state x,
- * and parameters theta, compute the marginal posterior probability
- * of the hidden states. In this setting, the hidden states are discrete
- * and takes values over the finite space {1, ..., K}.
+ * and parameters theta, compute the marginal probability
+ * vector for each x, given y and theta, p(x_i | y, theta).
+ * In this setting, the hidden states are discrete
+ * and take values over the finite space {1, ..., K}.
+ * Hence for each hidden variable x, we compute a simplex with K elements.
+ * The final result is stored in a K by N matrix, where N is the length of x.
  * log_omegas is a matrix of observational densities, where
  * the (i, j)th entry corresponds to the density of the ith observation, y_i,
  * given x_i = j.
  * The transition matrix Gamma is such that the (i, j)th entry is the
  * probability that x_n = j given x_{n - 1} = i. The rows of Gamma are
  * simplexes.
- * Note we only calculate marginal probabilities. However correlation between
- * the latent states must be considered in order to sample latent states.
+ * This function cannot be used to reconstruct the marginal distributon
+ * of a state sequence given parameters and an observation sequence,
+ * p(x | y, theta),
+ * because it only computes marginals on a state-by-state basis.
  *
  * @tparam T_omega type of the log likelihood matrix
  * @tparam T_Gamma type of the transition matrix
  * @tparam T_rho type of the initial guess vector
- * @param[in] log_omegas log matrix of observational densities.
- * @param[in] Gamma transition density between hidden states.
+ * @param[in] log_omegas log matrix of observational densities
+ * @param[in] Gamma transition density between hidden states
  * @param[in] rho initial state
- * @return the posterior probability for each latent state.
- * @throw `std::invalid_argument` if Gamma is not square, when we have
- *         at least one transition, or if the size of rho is not the
- *         number of rows of log_omegas.
+ * @return the posterior probability for each latent state
+ * @throw `std::invalid_argument` if Gamma is not square
+ *         or if the size of rho is not the number of rows of log_omegas
  * @throw `std::domain_error` if rho is not a simplex and of the rows
- *         of Gamma are not a simplex (when there is at least one transition).
+ *         of Gamma are not a simplex
  */
 template <typename T_omega, typename T_Gamma, typename T_rho>
 inline Eigen::MatrixXd hmm_latent_marginal_prob(
@@ -57,17 +61,17 @@ inline Eigen::MatrixXd hmm_latent_marginal_prob(
   alphas.col(0) /= alphas.col(0).maxCoeff();
 
   Eigen::MatrixXd Gamma_dbl_transpose = Gamma_dbl.transpose();
-  for (int n = 1; n <= n_transitions; ++n)
-    alphas.col(n)
-        = omegas.col(n).cwiseProduct(Gamma_dbl_transpose * alphas.col(n - 1));
+  for (int n = 0; n < n_transitions; ++n)
+    alphas.col(n + 1)
+        = omegas.col(n + 1).cwiseProduct(Gamma_dbl_transpose * alphas.col(n));
 
   // Backward pass with running normalization
   Eigen::VectorXd beta = Eigen::VectorXd::Ones(n_states);
 
   alphas.col(n_transitions) /= alphas.col(n_transitions).sum();
 
-  for (int n = n_transitions - 1; n >= 0; --n) {
-    beta = Gamma_dbl * (omegas.col(n + 1).cwiseProduct(beta));
+  for (int n = n_transitions; n-- > 0; ) {
+    beta = Gamma_dbl * omegas.col(n + 1).cwiseProduct(beta);
     beta /= beta.maxCoeff();
 
     // Reuse alphas to store probabilities
diff --git a/stan/math/prim/prob/hmm_latent_rng.hpp b/stan/math/prim/prob/hmm_latent_rng.hpp
index 43d2286c4ca..76aa9e48969 100644
--- a/stan/math/prim/prob/hmm_latent_rng.hpp
+++ b/stan/math/prim/prob/hmm_latent_rng.hpp
@@ -30,7 +30,7 @@ namespace math {
  * @param[in] log_omegas log matrix of observational densities.
  * @param[in] Gamma transition density between hidden states.
  * @param[in] rho initial state
- * @param[in] rng random number generator (duh!)
+ * @param[in] rng random number generator
  * @return sample from the posterior distribution of the hidden states.
  * @throw `std::invalid_argument` if Gamma is not square, when we have
  *         at least one transition, or if the size of rho is not the
@@ -73,15 +73,14 @@ inline std::vector<int> hmm_latent_rng(
   boost::random::discrete_distribution<> cat_hidden(probs);
   hidden_states[n_transitions] = cat_hidden(rng);
 
-  for (int n = n_transitions - 1; n >= 0; --n) {
+  for (int n = n_transitions; n-- > 0; ) {
     // sample the nth hidden state conditional on (n + 1)st hidden state
     int last_hs = hidden_states[n + 1];
 
-    // CHECK -- do we need to redeclare the hidden state?
-    for (int k = 0; k < n_states; ++k) {
-      probs_vec[k] = alphas(k, n) * omegas(last_hs, n + 1)
-                     * Gamma_dbl_transpose(last_hs, k) * beta(last_hs);
-    }
+    probs_vec = alphas.col(n).
+                cwiseProduct(Gamma_dbl.col(last_hs))
+                * beta(last_hs) * omegas(last_hs, n + 1);
+
     probs_vec /= probs_vec.sum();
     std::vector<double> probs(probs_vec.data(), probs_vec.data() + n_states);
     boost::random::discrete_distribution<> cat_hidden(probs);

From 54a04e1f9de4cae591341ff3e014d8a2ef7523e7 Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Tue, 12 May 2020 21:38:59 +0000
Subject: [PATCH 05/21] [Jenkins] auto-formatting by clang-format version
 6.0.0-1ubuntu2~16.04.1 (tags/RELEASE_600/final)

---
 stan/math/prim/prob/hmm_latent_marginal_prob.hpp | 2 +-
 stan/math/prim/prob/hmm_latent_rng.hpp           | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
index 73e8f014061..60b43d69332 100644
--- a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
+++ b/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
@@ -70,7 +70,7 @@ inline Eigen::MatrixXd hmm_latent_marginal_prob(
 
   alphas.col(n_transitions) /= alphas.col(n_transitions).sum();
 
-  for (int n = n_transitions; n-- > 0; ) {
+  for (int n = n_transitions; n-- > 0;) {
     beta = Gamma_dbl * omegas.col(n + 1).cwiseProduct(beta);
     beta /= beta.maxCoeff();
 
diff --git a/stan/math/prim/prob/hmm_latent_rng.hpp b/stan/math/prim/prob/hmm_latent_rng.hpp
index 76aa9e48969..fce44a576e4 100644
--- a/stan/math/prim/prob/hmm_latent_rng.hpp
+++ b/stan/math/prim/prob/hmm_latent_rng.hpp
@@ -73,12 +73,11 @@ inline std::vector<int> hmm_latent_rng(
   boost::random::discrete_distribution<> cat_hidden(probs);
   hidden_states[n_transitions] = cat_hidden(rng);
 
-  for (int n = n_transitions; n-- > 0; ) {
+  for (int n = n_transitions; n-- > 0;) {
     // sample the nth hidden state conditional on (n + 1)st hidden state
     int last_hs = hidden_states[n + 1];
 
-    probs_vec = alphas.col(n).
-                cwiseProduct(Gamma_dbl.col(last_hs))
+    probs_vec = alphas.col(n).cwiseProduct(Gamma_dbl.col(last_hs))
                 * beta(last_hs) * omegas(last_hs, n + 1);
 
     probs_vec /= probs_vec.sum();

From 368b51e23365cd9e5ada03ef9fdc067fb2b834a0 Mon Sep 17 00:00:00 2001
From: Niko Huurre <nhuurre@protonmail.com>
Date: Wed, 13 May 2020 12:24:40 +0300
Subject: [PATCH 06/21] add closure API to ode_bdf

---
 stan/math/rev/functor/coupled_ode_system.hpp  | 19 +++++++++++--
 stan/math/rev/functor/cvodes_integrator.hpp   | 27 +++++++++++--------
 stan/math/rev/functor/ode_bdf.hpp             |  6 +++--
 .../rev/functor/ode_store_sensitivities.hpp   | 15 ++++++++---
 4 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/stan/math/rev/functor/coupled_ode_system.hpp b/stan/math/rev/functor/coupled_ode_system.hpp
index 32679eb9e0f..789e23cf25f 100644
--- a/stan/math/rev/functor/coupled_ode_system.hpp
+++ b/stan/math/rev/functor/coupled_ode_system.hpp
@@ -73,6 +73,8 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
   const size_t N_;
   std::ostream* msgs_;
 
+  using F_copy = typename F::DeepCopy__;
+
   /**
    * Construct a coupled ode system from the base system function,
    * initial state of the base system, parameters, and a stream for
@@ -123,6 +125,8 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
     for (size_t n = 0; n < N_; ++n)
       y_vars(n) = z(n);
 
+    F_copy f_vars_ = f_;
+
     auto local_args_tuple = apply(
         [&](auto&&... args) {
           return std::tuple<decltype(deep_copy_vars(args))...>(
@@ -131,7 +135,7 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
         args_tuple_);
 
     Eigen::Matrix<var, Eigen::Dynamic, 1> f_y_t_vars
-        = apply([&](auto&&... args) { return f_(t, y_vars, msgs_, args...); },
+        = apply([&](auto&&... args) { return f_vars_(t, y_vars, msgs_, args...); },
                 local_args_tuple);
 
     check_size_match("coupled_ode_system", "dy_dt", f_y_t_vars.size(), "states",
@@ -168,6 +172,17 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
         dz_dt[N_ + N_ * y0_vars_ + N_ * j + i] = temp_deriv;
       }
 
+      args_adjoints = Eigen::VectorXd::Zero(f_.num_vars__);
+      f_vars_.accumulate_adjoints(args_adjoints.data());
+      for (size_t j = 0; j < f_.num_vars__; j++) {
+        double temp_deriv = args_adjoints(j);
+        for (size_t k = 0; k < N_; k++) {
+          temp_deriv += z[N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * j + k] * y_vars[k].adj();
+        }
+
+        dz_dt[N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * j + i] = temp_deriv;
+      }
+
       nested.set_zero_all_adjoints();
     }
   }
@@ -177,7 +192,7 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
    *
    * @return size of the coupled system.
    */
-  size_t size() const { return N_ + N_ * y0_vars_ + N_ * args_vars_; }
+  size_t size() const { return N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * f_.num_vars__; }
 
   /**
    * Returns the initial state of the coupled system.
diff --git a/stan/math/rev/functor/cvodes_integrator.hpp b/stan/math/rev/functor/cvodes_integrator.hpp
index 10a6e3d059a..93e905d750c 100644
--- a/stan/math/rev/functor/cvodes_integrator.hpp
+++ b/stan/math/rev/functor/cvodes_integrator.hpp
@@ -30,10 +30,14 @@ namespace math {
 template <int Lmm, typename F, typename T_y0, typename T_t0, typename T_ts,
           typename... T_Args>
 class cvodes_integrator {
-  using T_Return = return_type_t<T_y0, T_t0, T_ts, T_Args...>;
+  using T_Return = return_type_t<T_y0, T_t0, T_ts,
+                                 typename F::captured_scalar_t__, T_Args...>;
   using T_y0_t0 = return_type_t<T_y0, T_t0>;
 
+  using F_dbl = typename F::ValueOf__;
+
   const F& f_;
+  const F_dbl f_dbl_;
   const Eigen::Matrix<T_y0_t0, Eigen::Dynamic, 1> y0_;
   const T_t0 t0_;
   const std::vector<T_ts>& ts_;
@@ -103,7 +107,7 @@ class cvodes_integrator {
     const Eigen::VectorXd y_vec = Eigen::Map<const Eigen::VectorXd>(y, N_);
 
     Eigen::VectorXd dy_dt_vec
-        = apply([&](auto&&... args) { return f_(t, y_vec, msgs_, args...); },
+        = apply([&](auto&&... args) { return f_dbl_(t, y_vec, msgs_, args...); },
                 value_of_args_tuple_);
 
     check_size_match("cvodes_integrator::rhs", "dy_dt", dy_dt_vec.size(),
@@ -121,7 +125,7 @@ class cvodes_integrator {
     Eigen::MatrixXd Jfy;
 
     auto f_wrapped = [&](const Eigen::Matrix<var, Eigen::Dynamic, 1>& y) {
-      return apply([&](auto&&... args) { return f_(t, y, msgs_, args...); },
+      return apply([&](auto&&... args) { return f_dbl_(t, y, msgs_, args...); },
                    value_of_args_tuple_);
     };
 
@@ -183,6 +187,7 @@ class cvodes_integrator {
                     long int max_num_steps, std::ostream* msgs,
                     const T_Args&... args)
       : f_(f),
+        f_dbl_(f),
         y0_(y0.unaryExpr([](const T_y0& val) { return T_y0_t0(val); })),
         t0_(t0),
         ts_(ts),
@@ -225,10 +230,10 @@ class cvodes_integrator {
     A_ = SUNDenseMatrix(N_, N_);
     LS_ = SUNDenseLinearSolver(nv_state_, A_);
 
-    if (y0_vars_ + args_vars_ > 0) {
+    if (y0_vars_ + args_vars_ + f_.num_vars__ > 0) {
       nv_state_sens_
-          = N_VCloneVectorArrayEmpty_Serial(y0_vars_ + args_vars_, nv_state_);
-      for (std::size_t i = 0; i < y0_vars_ + args_vars_; i++) {
+          = N_VCloneVectorArrayEmpty_Serial(y0_vars_ + args_vars_ + f_.num_vars__, nv_state_);
+      for (std::size_t i = 0; i < y0_vars_ + args_vars_ + f_.num_vars__; i++) {
         NV_DATA_S(nv_state_sens_[i]) = &coupled_state_[N_] + i * N_;
       }
     }
@@ -238,8 +243,8 @@ class cvodes_integrator {
     SUNLinSolFree(LS_);
     SUNMatDestroy(A_);
     N_VDestroy_Serial(nv_state_);
-    if (y0_vars_ + args_vars_ > 0) {
-      N_VDestroyVectorArray_Serial(nv_state_sens_, y0_vars_ + args_vars_);
+    if (y0_vars_ + args_vars_ + f_.num_vars__> 0) {
+      N_VDestroyVectorArray_Serial(nv_state_sens_, y0_vars_ + args_vars_ + f_.num_vars__);
     }
   }
 
@@ -287,9 +292,9 @@ class cvodes_integrator {
           "CVodeSetJacFn");
 
       // initialize forward sensitivity system of CVODES as needed
-      if (y0_vars_ + args_vars_ > 0) {
+      if (y0_vars_ + args_vars_ + f_.num_vars__ > 0) {
         check_flag_sundials(
-            CVodeSensInit(cvodes_mem, static_cast<int>(y0_vars_ + args_vars_),
+            CVodeSensInit(cvodes_mem, static_cast<int>(y0_vars_ + args_vars_ + f_.num_vars__),
                           CV_STAGGERED, &cvodes_integrator::cv_rhs_sens,
                           nv_state_sens_),
             "CVodeSensInit");
@@ -308,7 +313,7 @@ class cvodes_integrator {
               "CVode");
         }
 
-        if (y0_vars_ + args_vars_ > 0) {
+        if (y0_vars_ + args_vars_ + f_.num_vars__ > 0) {
           check_flag_sundials(CVodeGetSens(cvodes_mem, &t_init, nv_state_sens_),
                               "CVodeGetSens");
         }
diff --git a/stan/math/rev/functor/ode_bdf.hpp b/stan/math/rev/functor/ode_bdf.hpp
index 7e63d1fb36b..3d1ca77df16 100644
--- a/stan/math/rev/functor/ode_bdf.hpp
+++ b/stan/math/rev/functor/ode_bdf.hpp
@@ -45,7 +45,8 @@ namespace math {
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts, T_Args...>,
+std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts,
+                          typename F::captured_scalar_t__, T_Args...>,
                           Eigen::Dynamic, 1>>
 ode_bdf(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
         const T_t0& t0, const std::vector<T_ts>& ts, std::ostream* msgs,
@@ -93,7 +94,8 @@ ode_bdf(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts, T_Args...>,
+std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts,
+                          typename F::captured_scalar_t__, T_Args...>,
                           Eigen::Dynamic, 1>>
 ode_bdf_tol(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
             const T_t0& t0, const std::vector<T_ts>& ts,
diff --git a/stan/math/rev/functor/ode_store_sensitivities.hpp b/stan/math/rev/functor/ode_store_sensitivities.hpp
index f2c505ccf08..279b08b6c6f 100644
--- a/stan/math/rev/functor/ode_store_sensitivities.hpp
+++ b/stan/math/rev/functor/ode_store_sensitivities.hpp
@@ -36,25 +36,27 @@ Eigen::Matrix<var, Eigen::Dynamic, 1> ode_store_sensitivities(
     const F& f, const Eigen::VectorXd& coupled_state,
     const Eigen::Matrix<T_y0_t0, Eigen::Dynamic, 1>& y0, const T_t0& t0,
     const T_t& t, std::ostream* msgs, const Args&... args) {
+  using F_dbl = typename F::ValueOf__;
   const size_t N = y0.size();
   const size_t y0_vars = count_vars(y0);
   const size_t args_vars = count_vars(args...);
   const size_t t0_vars = count_vars(t0);
   const size_t t_vars = count_vars(t);
+  F_dbl f_dbl = f;
   Eigen::Matrix<var, Eigen::Dynamic, 1> yt(N);
 
   Eigen::VectorXd y = coupled_state.head(N);
 
   Eigen::VectorXd f_y_t;
   if (is_var<T_t>::value)
-    f_y_t = f(value_of(t), y, msgs, value_of(args)...);
+    f_y_t = f_dbl(value_of(t), y, msgs, value_of(args)...);
 
   Eigen::VectorXd f_y0_t0;
   if (is_var<T_t0>::value)
-    f_y0_t0 = f(value_of(t0), value_of(y0), msgs, value_of(args)...);
+    f_y0_t0 = f_dbl(value_of(t0), value_of(y0), msgs, value_of(args)...);
 
   for (size_t j = 0; j < N; j++) {
-    const size_t total_vars = y0_vars + args_vars + t0_vars + t_vars;
+    const size_t total_vars = y0_vars + args_vars + t0_vars + t_vars + f.num_vars__;
 
     vari** varis
         = ChainableStack::instance_->memalloc_.alloc_array<vari*>(total_vars);
@@ -80,6 +82,13 @@ Eigen::Matrix<var, Eigen::Dynamic, 1> ode_store_sensitivities(
       partials_ptr++;
     }
 
+    f.save_varis(varis_ptr);
+    varis_ptr += f.num_vars__;
+    for (std::size_t k = 0; k < f.num_vars__; ++k) {
+      *partials_ptr = coupled_state(N + N * y0_vars + N * args_vars + N * k + j);
+      partials_ptr++;
+    }
+
     varis_ptr = save_varis(varis_ptr, t0);
     if (t0_vars > 0) {
       double dyt_dt0 = 0.0;

From 271ccb092043929ee06f0f7fae7581707391e543 Mon Sep 17 00:00:00 2001
From: Niko Huurre <nhuurre@protonmail.com>
Date: Sat, 16 May 2020 10:37:45 +0300
Subject: [PATCH 07/21] closure fix

---
 stan/math/prim/functor/coupled_ode_system.hpp   | 10 ++++++----
 .../prim/functor/ode_store_sensitivities.hpp    |  3 ++-
 stan/math/rev/functor/cvodes_integrator.hpp     |  4 ++--
 .../rev/functor/ode_store_sensitivities.hpp     | 17 +++++++++++------
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/stan/math/prim/functor/coupled_ode_system.hpp b/stan/math/prim/functor/coupled_ode_system.hpp
index 004dfb8009c..3e32517153e 100644
--- a/stan/math/prim/functor/coupled_ode_system.hpp
+++ b/stan/math/prim/functor/coupled_ode_system.hpp
@@ -127,14 +127,16 @@ struct coupled_ode_system_impl<true, F, T_initial, Args...> {
 template <typename F, typename T_initial, typename... Args>
 struct coupled_ode_system
     : public coupled_ode_system_impl<
-          std::is_arithmetic<return_type_t<T_initial, Args...>>::value, F,
-          T_initial, Args...> {
+          std::is_arithmetic<return_type_t<typename F::captured_scalar_t__,
+                                           T_initial, Args...>>::value,
+          F, T_initial, Args...> {
   coupled_ode_system(const F& f,
                      const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
                      std::ostream* msgs, const Args&... args)
       : coupled_ode_system_impl<
-            std::is_arithmetic<return_type_t<T_initial, Args...>>::value, F,
-            T_initial, Args...>(f, y0, msgs, args...) {}
+          std::is_arithmetic<return_type_t<typename F::captured_scalar_t__,
+                                           T_initial, Args...>>::value,
+          F, T_initial, Args...>(f, y0, msgs, args...) {}
 };
 
 }  // namespace math
diff --git a/stan/math/prim/functor/ode_store_sensitivities.hpp b/stan/math/prim/functor/ode_store_sensitivities.hpp
index acf0fd61992..833734138cd 100644
--- a/stan/math/prim/functor/ode_store_sensitivities.hpp
+++ b/stan/math/prim/functor/ode_store_sensitivities.hpp
@@ -24,7 +24,8 @@ namespace math {
  * @return ODE state
  */
 template <typename F, typename... Args,
-          typename = require_all_arithmetic_t<scalar_type_t<Args>...>>
+          typename = require_all_arithmetic_t<typename F::captured_scalar_t__,
+                                              scalar_type_t<Args>...>>
 Eigen::VectorXd ode_store_sensitivities(const F& f,
                                         const Eigen::VectorXd& coupled_state,
                                         const Eigen::VectorXd& y0, double t0,
diff --git a/stan/math/rev/functor/cvodes_integrator.hpp b/stan/math/rev/functor/cvodes_integrator.hpp
index 93e905d750c..454b5769ba9 100644
--- a/stan/math/rev/functor/cvodes_integrator.hpp
+++ b/stan/math/rev/functor/cvodes_integrator.hpp
@@ -149,12 +149,12 @@ class cvodes_integrator {
     Eigen::VectorXd z(coupled_state_.size());
     Eigen::VectorXd dz_dt(coupled_state_.size());
     std::copy(y, y + N_, z.data());
-    for (std::size_t s = 0; s < y0_vars_ + args_vars_; s++) {
+    for (std::size_t s = 0; s < y0_vars_ + args_vars_ + f_.num_vars__; s++) {
       std::copy(NV_DATA_S(yS[s]), NV_DATA_S(yS[s]) + N_,
                 z.data() + (s + 1) * N_);
     }
     coupled_ode_(z, dz_dt, t);
-    for (std::size_t s = 0; s < y0_vars_ + args_vars_; s++) {
+    for (std::size_t s = 0; s < y0_vars_ + args_vars_ + f_.num_vars__; s++) {
       std::move(dz_dt.data() + (s + 1) * N_, dz_dt.data() + (s + 2) * N_,
                 NV_DATA_S(ySdot[s]));
     }
diff --git a/stan/math/rev/functor/ode_store_sensitivities.hpp b/stan/math/rev/functor/ode_store_sensitivities.hpp
index 279b08b6c6f..e6e2b28245d 100644
--- a/stan/math/rev/functor/ode_store_sensitivities.hpp
+++ b/stan/math/rev/functor/ode_store_sensitivities.hpp
@@ -42,18 +42,23 @@ Eigen::Matrix<var, Eigen::Dynamic, 1> ode_store_sensitivities(
   const size_t args_vars = count_vars(args...);
   const size_t t0_vars = count_vars(t0);
   const size_t t_vars = count_vars(t);
-  F_dbl f_dbl = f;
+
   Eigen::Matrix<var, Eigen::Dynamic, 1> yt(N);
 
   Eigen::VectorXd y = coupled_state.head(N);
 
   Eigen::VectorXd f_y_t;
-  if (is_var<T_t>::value)
-    f_y_t = f_dbl(value_of(t), y, msgs, value_of(args)...);
-
   Eigen::VectorXd f_y0_t0;
-  if (is_var<T_t0>::value)
-    f_y0_t0 = f_dbl(value_of(t0), value_of(y0), msgs, value_of(args)...);
+
+  if (is_var<T_t>::value || is_var<T_t0>::value) {
+    F_dbl f_dbl = f;
+
+    if (is_var<T_t>::value)
+      f_y_t = f_dbl(value_of(t), y, msgs, value_of(args)...);
+
+    if (is_var<T_t0>::value)
+      f_y0_t0 = f_dbl(value_of(t0), value_of(y0), msgs, value_of(args)...);
+  }
 
   for (size_t j = 0; j < N; j++) {
     const size_t total_vars = y0_vars + args_vars + t0_vars + t_vars + f.num_vars__;

From 8b69058b2e92145027b5cba7c958aac378ab551a Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Mon, 18 May 2020 10:46:32 -0400
Subject: [PATCH 08/21] Remove lpdf suffix.

---
 stan/math/prim/prob.hpp                       |  2 +-
 ...hmm_marginal_lpdf.hpp => hmm_marginal.hpp} |  8 ++--
 .../unit/math/prim/prob/hmm_marginal_test.cpp | 48 +++++++++----------
 3 files changed, 29 insertions(+), 29 deletions(-)
 rename stan/math/prim/prob/{hmm_marginal_lpdf.hpp => hmm_marginal.hpp} (97%)

diff --git a/stan/math/prim/prob.hpp b/stan/math/prim/prob.hpp
index 98aa434dd20..2ed44f0beeb 100644
--- a/stan/math/prim/prob.hpp
+++ b/stan/math/prim/prob.hpp
@@ -136,7 +136,7 @@
 #include <stan/math/prim/prob/gumbel_rng.hpp>
 #include <stan/math/prim/prob/hmm_latent_marginal_prob.hpp>
 #include <stan/math/prim/prob/hmm_latent_rng.hpp>
-#include <stan/math/prim/prob/hmm_marginal_lpdf.hpp>
+#include <stan/math/prim/prob/hmm_marginal.hpp>
 #include <stan/math/prim/prob/hypergeometric_log.hpp>
 #include <stan/math/prim/prob/hypergeometric_lpmf.hpp>
 #include <stan/math/prim/prob/hypergeometric_rng.hpp>
diff --git a/stan/math/prim/prob/hmm_marginal_lpdf.hpp b/stan/math/prim/prob/hmm_marginal.hpp
similarity index 97%
rename from stan/math/prim/prob/hmm_marginal_lpdf.hpp
rename to stan/math/prim/prob/hmm_marginal.hpp
index 1ef4195a1d2..09f470eb3a0 100644
--- a/stan/math/prim/prob/hmm_marginal_lpdf.hpp
+++ b/stan/math/prim/prob/hmm_marginal.hpp
@@ -15,7 +15,7 @@ namespace stan {
 namespace math {
 
 template <typename T_omega, typename T_Gamma, typename T_rho, typename T_alpha>
-inline auto hmm_marginal_lpdf_val(
+inline auto hmm_marginal_val(
     const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& omegas,
     const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma_val,
     const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho_val,
@@ -71,7 +71,7 @@ inline auto hmm_marginal_lpdf_val(
  *         of Gamma are not a simplex (when there is at least one transition).
  */
 template <typename T_omega, typename T_Gamma, typename T_rho>
-inline auto hmm_marginal_lpdf(
+inline auto hmm_marginal(
     const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
     const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
     const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho) {
@@ -82,7 +82,7 @@ inline auto hmm_marginal_lpdf(
   int n_states = log_omegas.rows();
   int n_transitions = log_omegas.cols() - 1;
 
-  hmm_check(log_omegas, Gamma, rho, "hmm_marginal_lpdf");
+  hmm_check(log_omegas, Gamma, rho, "hmm_marginal");
 
   operands_and_partials<Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>,
                         Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>,
@@ -97,7 +97,7 @@ inline auto hmm_marginal_lpdf(
   auto rho_val = value_of(rho);
   eig_matrix_partial omegas = value_of(log_omegas).array().exp();
   T_partial_type norm_norm;
-  auto log_marginal_density = hmm_marginal_lpdf_val(
+  auto log_marginal_density = hmm_marginal_val(
       omegas, Gamma_val, rho_val, alphas, alpha_log_norms, norm_norm);
 
   // Variables required for all three Jacobian-adjoint products.
diff --git a/test/unit/math/prim/prob/hmm_marginal_test.cpp b/test/unit/math/prim/prob/hmm_marginal_test.cpp
index 13450471c21..e1f901c1739 100644
--- a/test/unit/math/prim/prob/hmm_marginal_test.cpp
+++ b/test/unit/math/prim/prob/hmm_marginal_test.cpp
@@ -1,4 +1,4 @@
-#include <stan/math/prim/prob/hmm_marginal_lpdf.hpp>
+#include <stan/math/prim/prob/hmm_marginal.hpp>
 #include <boost/math/distributions.hpp>
 #include <boost/random.hpp>
 #include <test/unit/math/test_ad.hpp>
@@ -38,7 +38,7 @@ inline stan::return_type_t<T_omega, T_Gamma, T_rho> hmm_marginal_test_wrapper(
   for (int i = 0; i < n_states - 1; i++)
     rho(i) = rho_unconstrained[i];
 
-  return stan::math::hmm_marginal_lpdf(log_omegas, Gamma, rho);
+  return stan::math::hmm_marginal(log_omegas, Gamma, rho);
 }
 
 /**
@@ -115,9 +115,9 @@ class hmm_test : public ::testing::Test {
 // a forward algorithm written in R.
 // TODO(charlesm93): Add public repo link with R script.
 TEST_F(hmm_test, ten_transitions) {
-  using stan::math::hmm_marginal_lpdf;
+  using stan::math::hmm_marginal;
 
-  EXPECT_FLOAT_EQ(-18.37417, hmm_marginal_lpdf(log_omegas_, Gamma_, rho_));
+  EXPECT_FLOAT_EQ(-18.37417, hmm_marginal(log_omegas_, Gamma_, rho_));
 
   // Differentiation tests
   auto hmm_functor = [](const auto& log_omegas, const auto& Gamma_unconstrained,
@@ -131,9 +131,9 @@ TEST_F(hmm_test, ten_transitions) {
 }
 
 TEST_F(hmm_test, zero_transitions) {
-  using stan::math::hmm_marginal_lpdf;
+  using stan::math::hmm_marginal;
 
-  EXPECT_FLOAT_EQ(-1.520827, hmm_marginal_lpdf(log_omegas_zero_, Gamma_, rho_));
+  EXPECT_FLOAT_EQ(-1.520827, hmm_marginal(log_omegas_zero_, Gamma_, rho_));
 
   // Differentiation tests
   auto hmm_functor = [](const auto& log_omegas, const auto& Gamma_unconstrained,
@@ -146,8 +146,8 @@ TEST_F(hmm_test, zero_transitions) {
                         Gamma_unconstrained_, rho_unconstrained_);
 }
 
-TEST(hmm_marginal_lpdf, one_state) {
-  using stan::math::hmm_marginal_lpdf;
+TEST(hmm_marginal, one_state) {
+  using stan::math::hmm_marginal;
   int n_states = 1, p1_init = 1, gamma1 = 1, n_transitions = 10, abs_mu = 1,
       sigma = 1;
   Eigen::VectorXd rho(n_states);
@@ -161,7 +161,7 @@ TEST(hmm_marginal_lpdf, one_state) {
   for (int n = 0; n < n_transitions + 1; n++)
     log_omegas.col(n)[0] = state_lpdf(obs_data[n], abs_mu, sigma, 0);
 
-  EXPECT_FLOAT_EQ(-14.89646, hmm_marginal_lpdf(log_omegas, Gamma, rho));
+  EXPECT_FLOAT_EQ(-14.89646, hmm_marginal(log_omegas, Gamma, rho));
 
   // Differentiation tests
   // In the case where we have one state, Gamma and rho
@@ -172,17 +172,17 @@ TEST(hmm_marginal_lpdf, one_state) {
     Eigen::VectorXd rho(1);
     rho << 1;
 
-    return hmm_marginal_lpdf(log_omegas, Gamma, rho);
+    return hmm_marginal(log_omegas, Gamma, rho);
   };
 
   stan::test::ad_tolerances tols;
   stan::test::expect_ad(tols, hmm_functor, log_omegas);
 }
 
-TEST(hmm_marginal_lpdf, exceptions) {
+TEST(hmm_marginal, exceptions) {
   using Eigen::MatrixXd;
   using Eigen::VectorXd;
-  using stan::math::hmm_marginal_lpdf;
+  using stan::math::hmm_marginal;
 
   int n_states = 2;
   int n_transitions = 2;
@@ -201,46 +201,46 @@ TEST(hmm_marginal_lpdf, exceptions) {
   // Gamma is not square.
   MatrixXd Gamma_rec(n_states, n_states + 1);
   EXPECT_THROW_MSG(
-      hmm_marginal_lpdf(log_omegas, Gamma_rec, rho), std::invalid_argument,
-      "hmm_marginal_lpdf: Expecting a square matrix; rows of Gamma (2) "
+      hmm_marginal(log_omegas, Gamma_rec, rho), std::invalid_argument,
+      "hmm_marginal: Expecting a square matrix; rows of Gamma (2) "
       "and columns of Gamma (3) must match in size");
 
   // Gamma has a column that is not a simplex.
   MatrixXd Gamma_bad = Gamma;
   Gamma_bad(0, 0) = Gamma(0, 0) + 1;
-  EXPECT_THROW_MSG(hmm_marginal_lpdf(log_omegas, Gamma_bad, rho),
+  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma_bad, rho),
                    std::domain_error,
-                   "hmm_marginal_lpdf: Gamma[i, ] is not a valid simplex. "
+                   "hmm_marginal: Gamma[i, ] is not a valid simplex. "
                    "sum(Gamma[i, ]) = 2, but should be 1")
 
   // The size of Gamma is 0, even though there is at least one transition
   MatrixXd Gamma_empty(0, 0);
   EXPECT_THROW_MSG(
-      hmm_marginal_lpdf(log_omegas, Gamma_empty, rho), std::invalid_argument,
-      "hmm_marginal_lpdf: Gamma has size 0, but must have a non-zero size")
+      hmm_marginal(log_omegas, Gamma_empty, rho), std::invalid_argument,
+      "hmm_marginal: Gamma has size 0, but must have a non-zero size")
 
   // The size of Gamma is inconsistent with that of log_omega
   MatrixXd Gamma_wrong_size(n_states + 1, n_states + 1);
 
-  EXPECT_THROW_MSG(hmm_marginal_lpdf(log_omegas, Gamma_wrong_size, rho),
+  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma_wrong_size, rho),
                    std::invalid_argument,
-                   "hmm_marginal_lpdf: Columns of Gamma (3)"
+                   "hmm_marginal: Columns of Gamma (3)"
                    " and Rows of log_omegas (2) must match in size")
 
   // rho is not a simplex.
   VectorXd rho_bad = rho;
   rho_bad(0) = rho(0) + 1;
-  EXPECT_THROW_MSG(hmm_marginal_lpdf(log_omegas, Gamma, rho_bad),
+  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma, rho_bad),
                    std::domain_error,
-                   "hmm_marginal_lpdf: rho is not a valid simplex. "
+                   "hmm_marginal: rho is not a valid simplex. "
                    "sum(rho) = 2, but should be 1")
 
   // The size of rho is inconsistent with that of log_omega
   VectorXd rho_wrong_size(n_states + 1);
   EXPECT_THROW_MSG(
-      hmm_marginal_lpdf(log_omegas, Gamma, rho_wrong_size),
+      hmm_marginal(log_omegas, Gamma, rho_wrong_size),
       std::invalid_argument,
-      "hmm_marginal_lpdf: rho has dimension = 3, expecting dimension = 2;"
+      "hmm_marginal: rho has dimension = 3, expecting dimension = 2;"
       " a function was called with arguments of different scalar,"
       " array, vector, or matrix types, and they were not consistently sized;"
       "  all arguments must be scalars or multidimensional values of"

From a579c5bacb1dc76628b7568412fffaeac86e22b1 Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Mon, 18 May 2020 10:53:25 -0400
Subject: [PATCH 09/21] rename prob function hmm_hidden_state_prob.

---
 stan/math/prim/prob.hpp                                |  2 +-
 ...ent_marginal_prob.hpp => hmm_hidden_state_prob.hpp} |  8 ++++----
 ...al_prob_test.cpp => hmm_hidden_state_prob_test.cpp} | 10 +++++-----
 3 files changed, 10 insertions(+), 10 deletions(-)
 rename stan/math/prim/prob/{hmm_latent_marginal_prob.hpp => hmm_hidden_state_prob.hpp} (93%)
 rename test/unit/math/prim/prob/{hmm_latent_marginal_prob_test.cpp => hmm_hidden_state_prob_test.cpp} (78%)

diff --git a/stan/math/prim/prob.hpp b/stan/math/prim/prob.hpp
index 2ed44f0beeb..8d7190e0a15 100644
--- a/stan/math/prim/prob.hpp
+++ b/stan/math/prim/prob.hpp
@@ -134,7 +134,7 @@
 #include <stan/math/prim/prob/gumbel_log.hpp>
 #include <stan/math/prim/prob/gumbel_lpdf.hpp>
 #include <stan/math/prim/prob/gumbel_rng.hpp>
-#include <stan/math/prim/prob/hmm_latent_marginal_prob.hpp>
+#include <stan/math/prim/prob/hmm_hidden_state_prob.hpp>
 #include <stan/math/prim/prob/hmm_latent_rng.hpp>
 #include <stan/math/prim/prob/hmm_marginal.hpp>
 #include <stan/math/prim/prob/hypergeometric_log.hpp>
diff --git a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp b/stan/math/prim/prob/hmm_hidden_state_prob.hpp
similarity index 93%
rename from stan/math/prim/prob/hmm_latent_marginal_prob.hpp
rename to stan/math/prim/prob/hmm_hidden_state_prob.hpp
index 60b43d69332..27fc1e0baa8 100644
--- a/stan/math/prim/prob/hmm_latent_marginal_prob.hpp
+++ b/stan/math/prim/prob/hmm_hidden_state_prob.hpp
@@ -1,5 +1,5 @@
-#ifndef STAN_MATH_PRIM_PROB_HMM_LATENT_MARGINAL_PROB_HPP
-#define STAN_MATH_PRIM_PROB_HMM_LATENT_MARGINAL_PROB_HPP
+#ifndef STAN_MATH_PRIM_PROB_HMM_HIDDEN_STATE_PROB_HPP
+#define STAN_MATH_PRIM_PROB_HMM_HIDDEN_STATE_PROB_HPP
 
 #include <stan/math/prim/core.hpp>
 #include <stan/math/prim/meta.hpp>
@@ -43,14 +43,14 @@ namespace math {
  *         of Gamma are not a simplex
  */
 template <typename T_omega, typename T_Gamma, typename T_rho>
-inline Eigen::MatrixXd hmm_latent_marginal_prob(
+inline Eigen::MatrixXd hmm_hidden_state_prob(
     const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
     const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>& Gamma,
     const Eigen::Matrix<T_rho, Eigen::Dynamic, 1>& rho) {
   int n_states = log_omegas.rows();
   int n_transitions = log_omegas.cols() - 1;
 
-  hmm_check(log_omegas, Gamma, rho, "hmm_latent_marginal_prob");
+  hmm_check(log_omegas, Gamma, rho, "hmm_hidden_state_prob");
 
   Eigen::MatrixXd omegas = value_of(log_omegas).array().exp();
   Eigen::VectorXd rho_dbl = value_of(rho);
diff --git a/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
similarity index 78%
rename from test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
rename to test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
index 1a1be382921..f126854b231 100644
--- a/test/unit/math/prim/prob/hmm_latent_marginal_prob_test.cpp
+++ b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
@@ -1,5 +1,5 @@
 #include <test/unit/math/prim/prob/hmm_marginal_test.cpp>
-#include <stan/math/prim/prob/hmm_latent_marginal_prob.hpp>
+#include <stan/math/prim/prob/hmm_hidden_state_prob.hpp>
 #include <boost/math/distributions.hpp>
 #include <boost/random.hpp>
 #include <test/unit/math/test_ad.hpp>
@@ -9,7 +9,7 @@
 #include <vector>
 
 TEST_F(hmm_test, latent_prob_single_outcome) {
-  using stan::math::hmm_latent_marginal_prob;
+  using stan::math::hmm_hidden_state_prob;
 
   int n_states = 2;
   Eigen::MatrixXd Gamma(n_states, n_states);
@@ -17,7 +17,7 @@ TEST_F(hmm_test, latent_prob_single_outcome) {
   Eigen::VectorXd rho(n_states);
   rho << 1, 0;
 
-  Eigen::MatrixXd prob = hmm_latent_marginal_prob(log_omegas_, Gamma, rho);
+  Eigen::MatrixXd prob = hmm_hidden_state_prob(log_omegas_, Gamma, rho);
 
   for (int i = 0; i < n_transitions_; i++) {
     EXPECT_EQ(prob(0, i), 1);
@@ -29,13 +29,13 @@ TEST_F(hmm_test, latent_prob_identity_transition) {
   // With an identity transition matrix, all latent probabilities
   // are equal. Setting the log density to 1 for all states makes
   // the initial prob drive the subsequent probabilities.
-  using stan::math::hmm_latent_marginal_prob;
+  using stan::math::hmm_hidden_state_prob;
   int n_states = 2;
   Eigen::MatrixXd Gamma = Eigen::MatrixXd::Identity(n_states, n_states);
   Eigen::MatrixXd log_omegas
       = Eigen::MatrixXd::Ones(n_states, n_transitions_ + 1);
 
-  Eigen::MatrixXd prob = hmm_latent_marginal_prob(log_omegas, Gamma, rho_);
+  Eigen::MatrixXd prob = hmm_hidden_state_prob(log_omegas, Gamma, rho_);
 
   for (int i = 0; i < n_transitions_; i++) {
     EXPECT_FLOAT_EQ(prob(0, i), rho_(0));

From 0d2572e0508e6cb9dc8eeb5fb09f159beb0d9957 Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Mon, 18 May 2020 14:26:39 -0400
Subject: [PATCH 10/21] Add unit test for hmm_hidden_state_prob.

---
 .../prim/prob/hmm_hidden_state_prob_test.cpp  | 32 +++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
index f126854b231..bf47683125f 100644
--- a/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
+++ b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
@@ -8,7 +8,7 @@
 #include <limits>
 #include <vector>
 
-TEST_F(hmm_test, latent_prob_single_outcome) {
+TEST_F(hmm_test, hidden_state_single_outcome) {
   using stan::math::hmm_hidden_state_prob;
 
   int n_states = 2;
@@ -25,7 +25,7 @@ TEST_F(hmm_test, latent_prob_single_outcome) {
   }
 }
 
-TEST_F(hmm_test, latent_prob_identity_transition) {
+TEST_F(hmm_test, hidden_state_identity_transition) {
   // With an identity transition matrix, all latent probabilities
   // are equal. Setting the log density to 1 for all states makes
   // the initial prob drive the subsequent probabilities.
@@ -42,3 +42,31 @@ TEST_F(hmm_test, latent_prob_identity_transition) {
     EXPECT_FLOAT_EQ(prob(1, i), rho_(1));
   }
 }
+
+TEST(hmm_test_nonstandard, hidden_state_symmetry) {
+  // In this two states situation, the latent states are
+  // symmetric, based on the observational log density,
+  // and transition matrix.
+  // The initial conditions introduces an asymmetry in the first
+  // state.
+  // Therefore the hidden states all have probability 0.5.
+  using stan::math::hmm_hidden_state_prob;
+  int n_states = 2;
+  int n_transitions = 2;
+  Eigen::MatrixXd Gamma(n_states, n_states);
+  Gamma << 0.5, 0.5, 0.5, 0.5;
+  Eigen::VectorXd rho(n_states);
+  rho << 0.3, 0.7;
+  Eigen::MatrixXd log_omegas
+    = Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
+
+  Eigen::MatrixXd prob = hmm_hidden_state_prob(log_omegas, Gamma, rho);
+
+  EXPECT_FLOAT_EQ(prob(0, 0), 0.3);
+  EXPECT_FLOAT_EQ(prob(1, 0), 0.7);
+
+  for (int i = 1; i < n_transitions; i++) {
+    EXPECT_FLOAT_EQ(prob(0, i), 0.5);
+    EXPECT_FLOAT_EQ(prob(1, i), 0.5);
+  }
+}

From e5dc5474257696853798bff6d3c574ee989990a4 Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Tue, 19 May 2020 09:40:43 -0400
Subject: [PATCH 11/21] Add independence check and util header file for hmm
 tests.

---
 .../prim/prob/hmm_hidden_state_prob_test.cpp  |   5 +-
 .../math/prim/prob/hmm_latent_rng_test.cpp    |  74 +++++++++++-
 .../unit/math/prim/prob/hmm_marginal_test.cpp | 105 +---------------
 test/unit/math/prim/prob/hmm_util.hpp         | 112 ++++++++++++++++++
 4 files changed, 188 insertions(+), 108 deletions(-)
 create mode 100644 test/unit/math/prim/prob/hmm_util.hpp

diff --git a/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
index bf47683125f..85cd346c31b 100644
--- a/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
+++ b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
@@ -1,4 +1,4 @@
-#include <test/unit/math/prim/prob/hmm_marginal_test.cpp>
+#include <test/unit/math/prim/prob/hmm_util.hpp>
 #include <stan/math/prim/prob/hmm_hidden_state_prob.hpp>
 #include <boost/math/distributions.hpp>
 #include <boost/random.hpp>
@@ -48,8 +48,7 @@ TEST(hmm_test_nonstandard, hidden_state_symmetry) {
   // symmetric, based on the observational log density,
   // and transition matrix.
   // The initial conditions introduces an asymmetry in the first
-  // state.
-  // Therefore the hidden states all have probability 0.5.
+  // state. The other hidden states all have probability 0.5.
   using stan::math::hmm_hidden_state_prob;
   int n_states = 2;
   int n_transitions = 2;
diff --git a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
index 34f94a24f85..fa66d00f5f0 100644
--- a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
+++ b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
@@ -1,6 +1,7 @@
-#include <test/unit/math/prim/prob/hmm_marginal_test.cpp>
+#include <test/unit/math/prim/prob/hmm_util.hpp>
 #include <test/unit/math/prim/prob/vector_rng_test_helper.hpp>
 #include <stan/math/prim/prob/hmm_latent_rng.hpp>
+#include <stan/math/prim/prob/chi_square_lcdf.hpp>
 #include <boost/math/distributions.hpp>
 #include <boost/random.hpp>
 #include <test/unit/math/test_ad.hpp>
@@ -41,3 +42,74 @@ TEST(hmm_rng_test, chiSquareGoodnessFitTest) {
 
   assert_chi_squared(counts, expected, 1e-6);
 }
+
+TEST(hmm_rng_test, chiSquareGoodnessFitTest_symmetric) {
+  // In this two states situation, the latent states are
+  // symmetric, based on the observational log density,
+  // and transition matrix.
+  // The initial conditions introduces an asymmetry in the first
+  // state. The other hidden states all have probability 0.5.
+  // Note that the hidden states are also uncorrelated.
+  using stan::math::hmm_latent_rng;
+
+  int n_states = 2;
+  int n_transitions = 1;
+  Eigen::MatrixXd Gamma(n_states, n_states);
+  Gamma << 0.5, 0.5, 0.5, 0.5;
+  Eigen::VectorXd rho(n_states);
+  rho << 0.3, 0.7;
+  Eigen::MatrixXd log_omegas
+    = Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
+
+  boost::random::mt19937 rng;
+  int N = 10000;
+
+  std::vector<double> expected_0;
+  expected_0.push_back(N * rho(0));
+  expected_0.push_back(N * rho(1));
+
+  std::vector<double> expected_1;
+  expected_1.push_back(N * 0.5);
+  expected_1.push_back(N * 0.5);
+
+  std::vector<int> counts_0(2);
+  std::vector<int> counts_1(2);
+  // int product = 0;
+  std::vector<int> states;
+  int a = 0, b = 0, c = 0, d = 0;
+  for (int i = 0; i < N; ++i) {
+    states = hmm_latent_rng(log_omegas, Gamma, rho, rng);
+    ++counts_0[states[0]];
+    ++counts_1[states[1]];
+    // product += states[0] * states[1];
+    a += (states[0] == 0 && states[1] == 0);
+    b += (states[0] == 0 && states[1] == 1);
+    c += (states[0] == 1 && states[1] == 0);
+    d += (states[0] == 1 && states[1] == 1);
+  }
+
+  // Test the marginal probabilities of each variable
+  assert_chi_squared(counts_0, expected_0, 1e-6);
+  assert_chi_squared(counts_1, expected_1, 1e-6);
+
+  // Test that the two states are independent, using a chi squared
+  // test for independence.
+  Eigen::MatrixXd Expected(n_states, (n_transitions + 1));
+  Expected << (a + b) * (a + c), (a + b) * (b + d),
+              (c + d) * (a + c), (c + d) * (b + d);
+  Expected = Expected / N;
+
+  Eigen::MatrixXd Observed(n_states, (n_transitions + 1));
+  Observed << a, b, c, d;
+  double chi = 0;
+
+  for (int i = 0; i < n_states; ++i)
+    for (int j = 0; j < n_transitions + 1; ++j)
+      chi += (Observed(i, j) - Expected(i, j))
+              * (Observed(i, j) - Expected(i, j)) / Expected(i, j);
+
+  int nu = 1;
+  double p_value = exp(stan::math::chi_square_lcdf(chi, nu));
+  double threshold = 0.1;  // CHECK -- what is an appropriate threshold?
+  EXPECT_TRUE(p_value > threshold);
+}
diff --git a/test/unit/math/prim/prob/hmm_marginal_test.cpp b/test/unit/math/prim/prob/hmm_marginal_test.cpp
index e1f901c1739..7aad4865149 100644
--- a/test/unit/math/prim/prob/hmm_marginal_test.cpp
+++ b/test/unit/math/prim/prob/hmm_marginal_test.cpp
@@ -1,3 +1,4 @@
+#include <test/unit/math/prim/prob/hmm_util.hpp>
 #include <stan/math/prim/prob/hmm_marginal.hpp>
 #include <boost/math/distributions.hpp>
 #include <boost/random.hpp>
@@ -7,110 +8,6 @@
 #include <limits>
 #include <vector>
 
-/**
- * Wrapper around hmm_marginal_density which passes rho and
- * Gamma without the last element of each column. We recover
- * the last element using the fact each column sums to 1.
- * The purpose of this function is to do finite diff benchmarking,
- * without breaking the simplex constraint.
- */
-template <typename T_omega, typename T_Gamma, typename T_rho>
-inline stan::return_type_t<T_omega, T_Gamma, T_rho> hmm_marginal_test_wrapper(
-    const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
-    const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>&
-        Gamma_unconstrained,
-    const std::vector<T_rho>& rho_unconstrained) {
-  using stan::math::row;
-  using stan::math::sum;
-  int n_states = log_omegas.rows();
-
-  Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic> Gamma(n_states,
-                                                               n_states);
-  for (int i = 0; i < n_states; i++) {
-    Gamma(i, n_states - 1) = 1 - sum(row(Gamma_unconstrained, i + 1));
-    for (int j = 0; j < n_states - 1; j++) {
-      Gamma(i, j) = Gamma_unconstrained(i, j);
-    }
-  }
-
-  Eigen::Matrix<T_rho, Eigen::Dynamic, 1> rho(n_states);
-  rho(1) = 1 - sum(rho_unconstrained);
-  for (int i = 0; i < n_states - 1; i++)
-    rho(i) = rho_unconstrained[i];
-
-  return stan::math::hmm_marginal(log_omegas, Gamma, rho);
-}
-
-/**
- * In the proposed example, the latent state x determines
- * the observational distribution:
- *  0: normal(mu, sigma)
- *  1: normal(-mu, sigma)
- */
-double state_lpdf(double y, double abs_mu, double sigma, int state) {
-  int x = state == 0 ? 1 : -1;
-  double chi = (y - x * abs_mu) / sigma;
-  return -0.5 * chi * chi - 0.5 * std::log(2 * M_PI) - std::log(sigma);
-}
-
-class hmm_test : public ::testing::Test {
- protected:
-  void SetUp() override {
-    n_states_ = 2;
-    p1_init_ = 0.65;
-    gamma1_ = 0.7;
-    gamma2_ = 0.45;
-    n_transitions_ = 10;
-    abs_mu_ = 1;
-    sigma_ = 1;
-
-    Eigen::VectorXd rho(n_states_);
-    rho << p1_init_, 1 - p1_init_;
-    rho_ = rho;
-
-    Eigen::MatrixXd Gamma(n_states_, n_states_);
-    Gamma << gamma1_, 1 - gamma1_, gamma2_, 1 - gamma2_;
-    Gamma_ = Gamma;
-
-    Eigen::VectorXd obs_data(n_transitions_ + 1);
-    obs_data << -0.3315914, -0.1655340, -0.7984021, 0.2364608, -0.4489722,
-        2.1831438, -1.4778675, 0.8717423, -1.0370874, 0.1370296, 1.9786208;
-    obs_data_ = obs_data;
-
-    Eigen::MatrixXd log_omegas(n_states_, n_transitions_ + 1);
-    for (int n = 0; n < n_transitions_ + 1; n++) {
-      log_omegas.col(n)[0] = state_lpdf(obs_data[n], abs_mu_, sigma_, 0);
-      log_omegas.col(n)[1] = state_lpdf(obs_data[n], abs_mu_, sigma_, 1);
-    }
-    log_omegas_ = log_omegas;
-    log_omegas_zero_ = log_omegas.block(0, 0, n_states_, 1);
-
-    std::vector<double> rho_unconstrained(n_states_ - 1);
-    for (int i = 0; i < rho.size() - 1; i++)
-      rho_unconstrained[i] = rho(i);
-    rho_unconstrained_ = rho_unconstrained;
-
-    Gamma_unconstrained_ = Gamma.block(0, 0, n_states_, n_states_ - 1);
-  }
-
-  int n_states_, n_transitions_;
-  double abs_mu_, sigma_, p1_init_, gamma1_, gamma2_;
-
-  Eigen::VectorXd rho_;
-  Eigen::MatrixXd Gamma_;
-  Eigen::VectorXd obs_data_;
-  Eigen::MatrixXd log_omegas_;
-  Eigen::MatrixXd log_omegas_zero_;
-
-  // Construct "unconstrained" versions of rho and Gamma, without
-  // the final element which can be determnied using the fact
-  // the columns sum to 1. This allows us to do finite diff tests,
-  // without violating the simplex constraint of rho and Gamma.
-  std::vector<double> rho_unconstrained_;
-  Eigen::MatrixXd Gamma_unconstrained_;
-  stan::test::ad_tolerances tols_;
-};
-
 // For evaluation of the density, the C++ code is benchmarked against
 // a forward algorithm written in R.
 // TODO(charlesm93): Add public repo link with R script.
diff --git a/test/unit/math/prim/prob/hmm_util.hpp b/test/unit/math/prim/prob/hmm_util.hpp
new file mode 100644
index 00000000000..e2274076e21
--- /dev/null
+++ b/test/unit/math/prim/prob/hmm_util.hpp
@@ -0,0 +1,112 @@
+#include <stan/math/prim/prob/hmm_marginal.hpp>
+#include <boost/math/distributions.hpp>
+#include <boost/random.hpp>
+#include <test/unit/math/test_ad.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+#include <limits>
+#include <vector>
+
+/**
+ * Wrapper around hmm_marginal_density which passes rho and
+ * Gamma without the last element of each column. We recover
+ * the last element using the fact each column sums to 1.
+ * The purpose of this function is to do finite diff benchmarking,
+ * without breaking the simplex constraint.
+ */
+template <typename T_omega, typename T_Gamma, typename T_rho>
+inline stan::return_type_t<T_omega, T_Gamma, T_rho> hmm_marginal_test_wrapper(
+    const Eigen::Matrix<T_omega, Eigen::Dynamic, Eigen::Dynamic>& log_omegas,
+    const Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic>&
+        Gamma_unconstrained,
+    const std::vector<T_rho>& rho_unconstrained) {
+  using stan::math::row;
+  using stan::math::sum;
+  int n_states = log_omegas.rows();
+
+  Eigen::Matrix<T_Gamma, Eigen::Dynamic, Eigen::Dynamic> Gamma(n_states,
+                                                               n_states);
+  for (int i = 0; i < n_states; i++) {
+    Gamma(i, n_states - 1) = 1 - sum(row(Gamma_unconstrained, i + 1));
+    for (int j = 0; j < n_states - 1; j++) {
+      Gamma(i, j) = Gamma_unconstrained(i, j);
+    }
+  }
+
+  Eigen::Matrix<T_rho, Eigen::Dynamic, 1> rho(n_states);
+  rho(1) = 1 - sum(rho_unconstrained);
+  for (int i = 0; i < n_states - 1; i++)
+    rho(i) = rho_unconstrained[i];
+
+  return stan::math::hmm_marginal(log_omegas, Gamma, rho);
+}
+
+/**
+ * In the proposed example, the latent state x determines
+ * the observational distribution:
+ *  0: normal(mu, sigma)
+ *  1: normal(-mu, sigma)
+ */
+double state_lpdf(double y, double abs_mu, double sigma, int state) {
+  int x = state == 0 ? 1 : -1;
+  double chi = (y - x * abs_mu) / sigma;
+  return -0.5 * chi * chi - 0.5 * std::log(2 * M_PI) - std::log(sigma);
+}
+
+class hmm_test : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    n_states_ = 2;
+    p1_init_ = 0.65;
+    gamma1_ = 0.7;
+    gamma2_ = 0.45;
+    n_transitions_ = 10;
+    abs_mu_ = 1;
+    sigma_ = 1;
+
+    Eigen::VectorXd rho(n_states_);
+    rho << p1_init_, 1 - p1_init_;
+    rho_ = rho;
+
+    Eigen::MatrixXd Gamma(n_states_, n_states_);
+    Gamma << gamma1_, 1 - gamma1_, gamma2_, 1 - gamma2_;
+    Gamma_ = Gamma;
+
+    Eigen::VectorXd obs_data(n_transitions_ + 1);
+    obs_data << -0.3315914, -0.1655340, -0.7984021, 0.2364608, -0.4489722,
+        2.1831438, -1.4778675, 0.8717423, -1.0370874, 0.1370296, 1.9786208;
+    obs_data_ = obs_data;
+
+    Eigen::MatrixXd log_omegas(n_states_, n_transitions_ + 1);
+    for (int n = 0; n < n_transitions_ + 1; n++) {
+      log_omegas.col(n)[0] = state_lpdf(obs_data[n], abs_mu_, sigma_, 0);
+      log_omegas.col(n)[1] = state_lpdf(obs_data[n], abs_mu_, sigma_, 1);
+    }
+    log_omegas_ = log_omegas;
+    log_omegas_zero_ = log_omegas.block(0, 0, n_states_, 1);
+
+    std::vector<double> rho_unconstrained(n_states_ - 1);
+    for (int i = 0; i < rho.size() - 1; i++)
+      rho_unconstrained[i] = rho(i);
+    rho_unconstrained_ = rho_unconstrained;
+
+    Gamma_unconstrained_ = Gamma.block(0, 0, n_states_, n_states_ - 1);
+  }
+
+  int n_states_, n_transitions_;
+  double abs_mu_, sigma_, p1_init_, gamma1_, gamma2_;
+
+  Eigen::VectorXd rho_;
+  Eigen::MatrixXd Gamma_;
+  Eigen::VectorXd obs_data_;
+  Eigen::MatrixXd log_omegas_;
+  Eigen::MatrixXd log_omegas_zero_;
+
+  // Construct "unconstrained" versions of rho and Gamma, without
+  // the final element which can be determnied using the fact
+  // the columns sum to 1. This allows us to do finite diff tests,
+  // without violating the simplex constraint of rho and Gamma.
+  std::vector<double> rho_unconstrained_;
+  Eigen::MatrixXd Gamma_unconstrained_;
+  stan::test::ad_tolerances tols_;
+};

From cff697d053a337cd6d5e8ca7e4cf3b9c9b851759 Mon Sep 17 00:00:00 2001
From: Charles Margossian <charlesm@metrumrg.com>
Date: Tue, 19 May 2020 10:30:16 -0400
Subject: [PATCH 12/21] Add simpler unit test.

---
 .../math/prim/prob/hmm_latent_rng_test.cpp    | 49 ++++++++++++-------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
index fa66d00f5f0..fe2a8090fab 100644
--- a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
+++ b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
@@ -92,24 +92,37 @@ TEST(hmm_rng_test, chiSquareGoodnessFitTest_symmetric) {
   assert_chi_squared(counts_0, expected_0, 1e-6);
   assert_chi_squared(counts_1, expected_1, 1e-6);
 
+  // Test for independence (0 correlation by construction).
+  // By independence E(XY) = E(X)E(Y). We compute the R.H.S
+  // analytically and the L.H.S numerically.
+  std::vector<int> counts_xy(2);
+  counts_xy[0] = a;
+  counts_xy[1] = c;
+  std::vector<double> expected_xy;
+  expected_xy.push_back(N * rho(0) * 0.5);
+  expected_xy.push_back(N * rho(1) * 0.5);
+  assert_chi_squared(counts_xy, expected_xy, 1e-6);
+
+  // DRAFT -- code for chi-squared independence test.
+  // (overkill, since we have analytical prob for each cell)
   // Test that the two states are independent, using a chi squared
   // test for independence.
-  Eigen::MatrixXd Expected(n_states, (n_transitions + 1));
-  Expected << (a + b) * (a + c), (a + b) * (b + d),
-              (c + d) * (a + c), (c + d) * (b + d);
-  Expected = Expected / N;
-
-  Eigen::MatrixXd Observed(n_states, (n_transitions + 1));
-  Observed << a, b, c, d;
-  double chi = 0;
-
-  for (int i = 0; i < n_states; ++i)
-    for (int j = 0; j < n_transitions + 1; ++j)
-      chi += (Observed(i, j) - Expected(i, j))
-              * (Observed(i, j) - Expected(i, j)) / Expected(i, j);
-
-  int nu = 1;
-  double p_value = exp(stan::math::chi_square_lcdf(chi, nu));
-  double threshold = 0.1;  // CHECK -- what is an appropriate threshold?
-  EXPECT_TRUE(p_value > threshold);
+  // Eigen::MatrixXd Expected(n_states, (n_transitions + 1));
+  // Expected << (a + b) * (a + c), (a + b) * (b + d),
+  //             (c + d) * (a + c), (c + d) * (b + d);
+  // Expected = Expected / N;
+  //
+  // Eigen::MatrixXd Observed(n_states, (n_transitions + 1));
+  // Observed << a, b, c, d;
+  // double chi = 0;
+  //
+  // for (int i = 0; i < n_states; ++i)
+  //   for (int j = 0; j < n_transitions + 1; ++j)
+  //     chi += (Observed(i, j) - Expected(i, j))
+  //             * (Observed(i, j) - Expected(i, j)) / Expected(i, j);
+  //
+  // int nu = 1;
+  // double p_value = exp(stan::math::chi_square_lcdf(chi, nu));
+  // double threshold = 0.1;  // CHECK -- what is an appropriate threshold?
+  // EXPECT_TRUE(p_value > threshold);
 }

From 9f7645565edb992e9815546fb83291124dff8ad3 Mon Sep 17 00:00:00 2001
From: tadej <tadej.c@gmail.com>
Date: Thu, 21 May 2020 12:48:25 +0200
Subject: [PATCH 13/21] tmp

---
 stan/math/prim/prob/bernoulli_lpmf.hpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/stan/math/prim/prob/bernoulli_lpmf.hpp b/stan/math/prim/prob/bernoulli_lpmf.hpp
index 515c100b0a7..20f40d2fd29 100644
--- a/stan/math/prim/prob/bernoulli_lpmf.hpp
+++ b/stan/math/prim/prob/bernoulli_lpmf.hpp
@@ -31,11 +31,12 @@ return_type_t<T_prob> bernoulli_lpmf(const T_n& n, const T_prob& theta) {
   using T_partials_return = partials_return_t<T_n, T_prob>;
   using std::log;
   static const char* function = "bernoulli_lpmf";
-  check_bounded(function, "n", n, 0, 1);
-  check_finite(function, "Probability parameter", theta);
-  check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
+  const auto& n_ref = to_ref(n);
+  const auto& theta_ref = to_ref(theta);
+  check_bounded(function, "n", n_ref, 0, 1);
+  check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 0.0;
@@ -45,14 +46,13 @@ return_type_t<T_prob> bernoulli_lpmf(const T_n& n, const T_prob& theta) {
   }
 
   T_partials_return logp(0.0);
-  operands_and_partials<T_prob> ops_partials(theta);
+  operands_and_partials<decltype (theta_ref)> ops_partials(theta_ref);
 
-  scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_prob> theta_vec(theta);
+  scalar_seq_view<T_n> n_vec(n_ref);
+  scalar_seq_view<T_prob> theta_vec(theta_ref);
   size_t N = max_size(n, theta);
 
   if (size(theta) == 1) {
-    size_t sum = 0;
     for (size_t n = 0; n < N; n++) {
       sum += value_of(n_vec[n]);
     }

From d4aeee01cceca02b7d39193e5b1fe1797c2f5e1e Mon Sep 17 00:00:00 2001
From: tadej <tadej.c@gmail.com>
Date: Thu, 21 May 2020 13:46:58 +0200
Subject: [PATCH 14/21] completed bernoulli (no logit)

---
 stan/math/prim/prob/bernoulli_cdf.hpp   |  9 +++++----
 stan/math/prim/prob/bernoulli_lccdf.hpp |  9 +++++----
 stan/math/prim/prob/bernoulli_lcdf.hpp  |  9 +++++----
 stan/math/prim/prob/bernoulli_lpmf.hpp  | 13 ++++++++-----
 4 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/stan/math/prim/prob/bernoulli_cdf.hpp b/stan/math/prim/prob/bernoulli_cdf.hpp
index d4decf90969..3006cefcc8f 100644
--- a/stan/math/prim/prob/bernoulli_cdf.hpp
+++ b/stan/math/prim/prob/bernoulli_cdf.hpp
@@ -27,21 +27,22 @@ namespace math {
 template <typename T_n, typename T_prob>
 return_type_t<T_prob> bernoulli_cdf(const T_n& n, const T_prob& theta) {
   using T_partials_return = partials_return_t<T_n, T_prob>;
+  using T_theta_ref = ref_type_t<T_prob>;
   static const char* function = "bernoulli_cdf";
-  check_finite(function, "Probability parameter", theta);
-  check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
+  T_theta_ref theta_ref = to_ref(theta);
+  check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 1.0;
   }
 
   T_partials_return P(1.0);
-  operands_and_partials<T_prob> ops_partials(theta);
+  operands_and_partials<T_theta_ref> ops_partials(theta_ref);
 
   scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_prob> theta_vec(theta);
+  scalar_seq_view<T_theta_ref> theta_vec(theta_ref);
   size_t max_size_seq_view = max_size(n, theta);
 
   // Explicit return for extreme values
diff --git a/stan/math/prim/prob/bernoulli_lccdf.hpp b/stan/math/prim/prob/bernoulli_lccdf.hpp
index 4c05d70a90b..97dba0243ad 100644
--- a/stan/math/prim/prob/bernoulli_lccdf.hpp
+++ b/stan/math/prim/prob/bernoulli_lccdf.hpp
@@ -30,22 +30,23 @@ namespace math {
 template <typename T_n, typename T_prob>
 return_type_t<T_prob> bernoulli_lccdf(const T_n& n, const T_prob& theta) {
   using T_partials_return = partials_return_t<T_n, T_prob>;
+  using T_theta_ref = ref_type_t<T_prob>;
   using std::log;
   static const char* function = "bernoulli_lccdf";
-  check_finite(function, "Probability parameter", theta);
-  check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
+  T_theta_ref theta_ref = to_ref(theta);
+  check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 0.0;
   }
 
   T_partials_return P(0.0);
-  operands_and_partials<T_prob> ops_partials(theta);
+  operands_and_partials<T_theta_ref> ops_partials(theta_ref);
 
   scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_prob> theta_vec(theta);
+  scalar_seq_view<T_theta_ref> theta_vec(theta_ref);
   size_t max_size_seq_view = max_size(n, theta);
 
   // Explicit return for extreme values
diff --git a/stan/math/prim/prob/bernoulli_lcdf.hpp b/stan/math/prim/prob/bernoulli_lcdf.hpp
index ac6327aeed4..5a6e2142890 100644
--- a/stan/math/prim/prob/bernoulli_lcdf.hpp
+++ b/stan/math/prim/prob/bernoulli_lcdf.hpp
@@ -30,22 +30,23 @@ namespace math {
 template <typename T_n, typename T_prob>
 return_type_t<T_prob> bernoulli_lcdf(const T_n& n, const T_prob& theta) {
   using T_partials_return = partials_return_t<T_n, T_prob>;
+  using T_theta_ref = ref_type_t<T_prob>;
   using std::log;
   static const char* function = "bernoulli_lcdf";
-  check_finite(function, "Probability parameter", theta);
-  check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
+  T_theta_ref theta_ref = to_ref(theta);
+  check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 0.0;
   }
 
   T_partials_return P(0.0);
-  operands_and_partials<T_prob> ops_partials(theta);
+  operands_and_partials<T_theta_ref> ops_partials(theta_ref);
 
   scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_prob> theta_vec(theta);
+  scalar_seq_view<T_theta_ref> theta_vec(theta);
   size_t max_size_seq_view = max_size(n, theta);
 
   // Explicit return for extreme values
diff --git a/stan/math/prim/prob/bernoulli_lpmf.hpp b/stan/math/prim/prob/bernoulli_lpmf.hpp
index 20f40d2fd29..7e749f579a4 100644
--- a/stan/math/prim/prob/bernoulli_lpmf.hpp
+++ b/stan/math/prim/prob/bernoulli_lpmf.hpp
@@ -29,12 +29,14 @@ namespace math {
 template <bool propto, typename T_n, typename T_prob>
 return_type_t<T_prob> bernoulli_lpmf(const T_n& n, const T_prob& theta) {
   using T_partials_return = partials_return_t<T_n, T_prob>;
+  using T_theta_ref = ref_type_t<T_prob>;
+  using T_n_ref = ref_type_t<T_n>;
   using std::log;
   static const char* function = "bernoulli_lpmf";
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
-  const auto& n_ref = to_ref(n);
-  const auto& theta_ref = to_ref(theta);
+  const T_n_ref n_ref = to_ref(n);
+  const T_theta_ref theta_ref = to_ref(theta);
   check_bounded(function, "n", n_ref, 0, 1);
   check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
@@ -46,13 +48,14 @@ return_type_t<T_prob> bernoulli_lpmf(const T_n& n, const T_prob& theta) {
   }
 
   T_partials_return logp(0.0);
-  operands_and_partials<decltype (theta_ref)> ops_partials(theta_ref);
+  operands_and_partials<T_theta_ref> ops_partials(theta_ref);
 
-  scalar_seq_view<T_n> n_vec(n_ref);
-  scalar_seq_view<T_prob> theta_vec(theta_ref);
+  scalar_seq_view<T_n_ref> n_vec(n_ref);
+  scalar_seq_view<T_theta_ref> theta_vec(theta_ref);
   size_t N = max_size(n, theta);
 
   if (size(theta) == 1) {
+    size_t sum = 0;
     for (size_t n = 0; n < N; n++) {
       sum += value_of(n_vec[n]);
     }

From 7d43bf02a071ceac944a9d0e30b02af77495a88b Mon Sep 17 00:00:00 2001
From: Niko Huurre <nhuurre@protonmail.com>
Date: Thu, 21 May 2020 20:49:39 +0300
Subject: [PATCH 15/21] template hack

---
 stan/math/rev/functor/ode_store_sensitivities.hpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/stan/math/rev/functor/ode_store_sensitivities.hpp b/stan/math/rev/functor/ode_store_sensitivities.hpp
index e6e2b28245d..8f9439db08b 100644
--- a/stan/math/rev/functor/ode_store_sensitivities.hpp
+++ b/stan/math/rev/functor/ode_store_sensitivities.hpp
@@ -32,7 +32,9 @@ namespace math {
  */
 template <typename F, typename T_y0_t0, typename T_t0, typename T_t,
           typename... Args>
-Eigen::Matrix<var, Eigen::Dynamic, 1> ode_store_sensitivities(
+Eigen::Matrix<typename std::enable_if_t<is_var<
+    return_type_t<typename F::captured_scalar_t__, T_y0_t0, T_t0, T_t, Args...>
+  >::value, var>, Eigen::Dynamic, 1> ode_store_sensitivities(
     const F& f, const Eigen::VectorXd& coupled_state,
     const Eigen::Matrix<T_y0_t0, Eigen::Dynamic, 1>& y0, const T_t0& t0,
     const T_t& t, std::ostream* msgs, const Args&... args) {

From bd33828bdc474620efe9f9b24ed954a5ec37094c Mon Sep 17 00:00:00 2001
From: tadej <tadej.c@gmail.com>
Date: Mon, 25 May 2020 09:55:39 +0200
Subject: [PATCH 16/21] finished with bernoulli functions

---
 stan/math/prim/meta/as_array_or_scalar.hpp    |  4 +-
 stan/math/prim/prob/bernoulli_cdf.hpp         |  2 +-
 stan/math/prim/prob/bernoulli_lccdf.hpp       |  2 +-
 stan/math/prim/prob/bernoulli_lcdf.hpp        |  4 +-
 .../prim/prob/bernoulli_logit_glm_rng.hpp     | 37 ++++++-----
 stan/math/prim/prob/bernoulli_logit_lpmf.hpp  | 66 ++++++++++---------
 stan/math/prim/prob/bernoulli_logit_rng.hpp   |  5 +-
 stan/math/prim/prob/bernoulli_rng.hpp         |  6 +-
 8 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/stan/math/prim/meta/as_array_or_scalar.hpp b/stan/math/prim/meta/as_array_or_scalar.hpp
index 38deb58f38b..6560b52804e 100644
--- a/stan/math/prim/meta/as_array_or_scalar.hpp
+++ b/stan/math/prim/meta/as_array_or_scalar.hpp
@@ -17,8 +17,8 @@ namespace math {
  * @return Same value.
  */
 template <typename T, typename = require_stan_scalar_t<T>>
-inline const T& as_array_or_scalar(const T& v) {
-  return v;
+inline T as_array_or_scalar(T&& v) {
+  return std::forward<T>(v);
 }
 
 /** \ingroup type_trait
diff --git a/stan/math/prim/prob/bernoulli_cdf.hpp b/stan/math/prim/prob/bernoulli_cdf.hpp
index 3006cefcc8f..0c36305f65b 100644
--- a/stan/math/prim/prob/bernoulli_cdf.hpp
+++ b/stan/math/prim/prob/bernoulli_cdf.hpp
@@ -31,7 +31,7 @@ return_type_t<T_prob> bernoulli_cdf(const T_n& n, const T_prob& theta) {
   static const char* function = "bernoulli_cdf";
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
-  T_theta_ref theta_ref = to_ref(theta);
+  T_theta_ref theta_ref = theta;
   check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
diff --git a/stan/math/prim/prob/bernoulli_lccdf.hpp b/stan/math/prim/prob/bernoulli_lccdf.hpp
index 97dba0243ad..0e681d1547d 100644
--- a/stan/math/prim/prob/bernoulli_lccdf.hpp
+++ b/stan/math/prim/prob/bernoulli_lccdf.hpp
@@ -35,7 +35,7 @@ return_type_t<T_prob> bernoulli_lccdf(const T_n& n, const T_prob& theta) {
   static const char* function = "bernoulli_lccdf";
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
-  T_theta_ref theta_ref = to_ref(theta);
+  T_theta_ref theta_ref = theta;
   check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
diff --git a/stan/math/prim/prob/bernoulli_lcdf.hpp b/stan/math/prim/prob/bernoulli_lcdf.hpp
index 5a6e2142890..d5423823389 100644
--- a/stan/math/prim/prob/bernoulli_lcdf.hpp
+++ b/stan/math/prim/prob/bernoulli_lcdf.hpp
@@ -35,7 +35,7 @@ return_type_t<T_prob> bernoulli_lcdf(const T_n& n, const T_prob& theta) {
   static const char* function = "bernoulli_lcdf";
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
-  T_theta_ref theta_ref = to_ref(theta);
+  T_theta_ref theta_ref = theta;
   check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
@@ -46,7 +46,7 @@ return_type_t<T_prob> bernoulli_lcdf(const T_n& n, const T_prob& theta) {
   operands_and_partials<T_theta_ref> ops_partials(theta_ref);
 
   scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_theta_ref> theta_vec(theta);
+  scalar_seq_view<T_theta_ref> theta_vec(theta_ref);
   size_t max_size_seq_view = max_size(n, theta);
 
   // Explicit return for extreme values
diff --git a/stan/math/prim/prob/bernoulli_logit_glm_rng.hpp b/stan/math/prim/prob/bernoulli_logit_glm_rng.hpp
index ed48b749be6..54377cad1f2 100644
--- a/stan/math/prim/prob/bernoulli_logit_glm_rng.hpp
+++ b/stan/math/prim/prob/bernoulli_logit_glm_rng.hpp
@@ -38,35 +38,42 @@ namespace math {
  */
 template <typename T_x, typename T_alpha, typename T_beta, class RNG>
 inline typename VectorBuilder<true, int, T_alpha>::type bernoulli_logit_glm_rng(
-    const T_x &x, const T_alpha &alpha, const T_beta &beta, RNG &rng) {
+    const T_x& x, const T_alpha& alpha, const T_beta& beta, RNG& rng) {
   using boost::bernoulli_distribution;
   using boost::variate_generator;
+  using T_x_ref = ref_type_t<T_x>;
+  using T_alpha_ref = ref_type_t<T_alpha>;
+  using T_beta_ref = ref_type_t<T_beta>;
 
-  const size_t N = x.row(0).size();
-  const size_t M = x.col(0).size();
+  const size_t N = x.cols();
+  const size_t M = x.rows();
 
-  static const char *function = "bernoulli_logit_glm_rng";
-  check_finite(function, "Matrix of independent variables", x);
-  check_finite(function, "Weight vector", beta);
-  check_finite(function, "Intercept", alpha);
+  static const char* function = "bernoulli_logit_glm_rng";
   check_consistent_size(function, "Weight vector", beta, N);
   check_consistent_size(function, "Vector of intercepts", alpha, M);
+  T_x_ref x_ref = x;
+  T_alpha_ref alpha_ref = alpha;
+  T_beta_ref beta_ref = beta;
+  check_finite(function, "Matrix of independent variables", x_ref);
+  check_finite(function, "Weight vector", beta_ref);
+  check_finite(function, "Intercept", alpha_ref);
 
-  scalar_seq_view<T_beta> beta_vec(beta);
-  Eigen::VectorXd beta_vector(N);
-  for (int i = 0; i < N; ++i) {
-    beta_vector[i] = beta_vec[i];
-  }
+  const auto& beta_vector = as_column_vector_or_scalar(beta_ref);
 
-  Eigen::VectorXd x_beta = x * beta_vector;
+  Eigen::VectorXd x_beta;
+  if (is_vector<T_beta>::value) {
+    x_beta = x_ref * beta_vector;
+  } else {
+    x_beta = (x_ref.array() * forward_as<double>(beta_vector)).rowwise().sum();
+  }
 
-  scalar_seq_view<T_alpha> alpha_vec(alpha);
+  scalar_seq_view<T_alpha> alpha_vec(alpha_ref);
 
   VectorBuilder<true, int, T_alpha> output(M);
 
   for (size_t m = 0; m < M; ++m) {
     double theta_m = alpha_vec[m] + x_beta(m);
-    variate_generator<RNG &, bernoulli_distribution<>> bernoulli_rng(
+    variate_generator<RNG&, bernoulli_distribution<>> bernoulli_rng(
         rng, bernoulli_distribution<>(inv_logit(theta_m)));
     output[m] = bernoulli_rng();
   }
diff --git a/stan/math/prim/prob/bernoulli_logit_lpmf.hpp b/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
index 418aacb23ef..3e55e02000b 100644
--- a/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
+++ b/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
@@ -8,6 +8,7 @@
 #include <stan/math/prim/fun/max_size.hpp>
 #include <stan/math/prim/fun/size_zero.hpp>
 #include <stan/math/prim/fun/value_of.hpp>
+#include <stan/math/prim/fun/value_of_rec.hpp>
 #include <cmath>
 
 namespace stan {
@@ -28,12 +29,17 @@ namespace math {
 template <bool propto, typename T_n, typename T_prob>
 return_type_t<T_prob> bernoulli_logit_lpmf(const T_n& n, const T_prob& theta) {
   using T_partials_return = partials_return_t<T_n, T_prob>;
+  using T_partials_array = Eigen::Array<T_partials_return, Eigen::Dynamic, 1>;
   using std::exp;
+  using T_n_ref = ref_type_t<T_n>;
+  using T_theta_ref = ref_type_t<T_prob>;
   static const char* function = "bernoulli_logit_lpmf";
-  check_bounded(function, "n", n, 0, 1);
-  check_not_nan(function, "Logit transformed probability parameter", theta);
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
+  T_n_ref n_ref = n;
+  T_theta_ref theta_ref = theta;
+  check_bounded(function, "n", n_ref, 0, 1);
+  check_not_nan(function, "Logit transformed probability parameter", theta_ref);
 
   if (size_zero(n, theta)) {
     return 0.0;
@@ -43,39 +49,35 @@ return_type_t<T_prob> bernoulli_logit_lpmf(const T_n& n, const T_prob& theta) {
   }
 
   T_partials_return logp(0.0);
-  operands_and_partials<T_prob> ops_partials(theta);
+  operands_and_partials<T_theta_ref> ops_partials(theta_ref);
 
-  scalar_seq_view<T_n> n_vec(n);
-  scalar_seq_view<T_prob> theta_vec(theta);
-  size_t N = max_size(n, theta);
+  const auto& theta_val = as_array_or_scalar(value_of(theta_ref));
 
-  for (size_t n = 0; n < N; n++) {
-    const T_partials_return theta_dbl = value_of(theta_vec[n]);
-
-    const int sign = 2 * n_vec[n] - 1;
-    const T_partials_return ntheta = sign * theta_dbl;
-    const T_partials_return exp_m_ntheta = exp(-ntheta);
-
-    // Handle extreme values gracefully using Taylor approximations.
-    static const double cutoff = 20.0;
-    if (ntheta > cutoff) {
-      logp -= exp_m_ntheta;
-    } else if (ntheta < -cutoff) {
-      logp += ntheta;
-    } else {
-      logp -= log1p(exp_m_ntheta);
-    }
+  auto signs = to_ref_if<!is_constant<T_prob>::value>(
+      (2 * as_array_or_scalar(value_of_rec(n_ref)) - 1));
+  T_partials_array ntheta;
+  if (is_vector<T_n>::value || is_vector<T_prob>::value) {
+    ntheta = forward_as<T_partials_array>(signs * theta_val);
+  } else {
+    T_partials_return theta_val2 = theta_val;
+    T_partials_return ntheta_s = forward_as<T_partials_return>(signs * theta_val);
+    ntheta = T_partials_array::Constant(1, 1,
+                                        ntheta_s);
+  }
+  T_partials_array exp_m_ntheta = exp(-ntheta);
+  static const double cutoff = 20.0;
+  logp += sum(
+      (ntheta > cutoff)
+          .select(-exp_m_ntheta,
+                  (ntheta < -cutoff).select(ntheta, -log1p(exp_m_ntheta))));
 
-    if (!is_constant_all<T_prob>::value) {
-      if (ntheta > cutoff) {
-        ops_partials.edge1_.partials_[n] -= exp_m_ntheta;
-      } else if (ntheta < -cutoff) {
-        ops_partials.edge1_.partials_[n] += sign;
-      } else {
-        ops_partials.edge1_.partials_[n]
-            += sign * exp_m_ntheta / (exp_m_ntheta + 1);
-      }
-    }
+  if (!is_constant_all<T_prob>::value) {
+    ops_partials.edge1_.partials_
+        = (ntheta > cutoff)
+              .select(-exp_m_ntheta,
+                      (ntheta < -cutoff)
+                          .select(signs,
+                                  signs * exp_m_ntheta / (exp_m_ntheta + 1)));
   }
   return ops_partials.build(logp);
 }
diff --git a/stan/math/prim/prob/bernoulli_logit_rng.hpp b/stan/math/prim/prob/bernoulli_logit_rng.hpp
index 5f3ae9903df..7cc493ef018 100644
--- a/stan/math/prim/prob/bernoulli_logit_rng.hpp
+++ b/stan/math/prim/prob/bernoulli_logit_rng.hpp
@@ -30,10 +30,11 @@ inline typename VectorBuilder<true, int, T_t>::type bernoulli_logit_rng(
     const T_t& t, RNG& rng) {
   using boost::bernoulli_distribution;
   using boost::variate_generator;
+  ref_type_t<T_t> t_ref = t;
   check_finite("bernoulli_logit_rng", "Logit transformed probability parameter",
-               t);
+               t_ref);
 
-  scalar_seq_view<T_t> t_vec(t);
+  scalar_seq_view<T_t> t_vec(t_ref);
   size_t N = stan::math::size(t);
   VectorBuilder<true, int, T_t> output(N);
 
diff --git a/stan/math/prim/prob/bernoulli_rng.hpp b/stan/math/prim/prob/bernoulli_rng.hpp
index 8de2f400bda..9e588953712 100644
--- a/stan/math/prim/prob/bernoulli_rng.hpp
+++ b/stan/math/prim/prob/bernoulli_rng.hpp
@@ -30,10 +30,10 @@ inline typename VectorBuilder<true, int, T_theta>::type bernoulli_rng(
   using boost::bernoulli_distribution;
   using boost::variate_generator;
   static const char* function = "bernoulli_rng";
-  check_finite(function, "Probability parameter", theta);
-  check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
+  ref_type_t<T_theta> theta_ref = theta;
+  check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
-  scalar_seq_view<T_theta> theta_vec(theta);
+  scalar_seq_view<T_theta> theta_vec(theta_ref);
   size_t N = stan::math::size(theta);
   VectorBuilder<true, int, T_theta> output(N);
 

From aea8e523a6e8c5dfd028336fb40599721e4f744b Mon Sep 17 00:00:00 2001
From: tadej <tadej.c@gmail.com>
Date: Wed, 27 May 2020 12:02:27 +0200
Subject: [PATCH 17/21] bugfixes

---
 stan/math/fwd/meta/operands_and_partials.hpp | 10 +++---
 stan/math/prim/meta/broadcast_array.hpp      |  4 ++-
 stan/math/prim/prob/bernoulli_lcdf.hpp       |  2 +-
 stan/math/prim/prob/bernoulli_logit_lpmf.hpp | 34 +++++++++++++-------
 4 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/stan/math/fwd/meta/operands_and_partials.hpp b/stan/math/fwd/meta/operands_and_partials.hpp
index e622d24def8..c771de79287 100644
--- a/stan/math/fwd/meta/operands_and_partials.hpp
+++ b/stan/math/fwd/meta/operands_and_partials.hpp
@@ -70,11 +70,11 @@ template <typename Op1, typename Op2, typename Op3, typename Op4, typename Op5,
           typename Dx>
 class operands_and_partials<Op1, Op2, Op3, Op4, Op5, fvar<Dx>> {
  public:
-  internal::ops_partials_edge<Dx, Op1> edge1_;
-  internal::ops_partials_edge<Dx, Op2> edge2_;
-  internal::ops_partials_edge<Dx, Op3> edge3_;
-  internal::ops_partials_edge<Dx, Op4> edge4_;
-  internal::ops_partials_edge<Dx, Op5> edge5_;
+  internal::ops_partials_edge<Dx, std::decay_t<Op1>> edge1_;
+  internal::ops_partials_edge<Dx, std::decay_t<Op2>> edge2_;
+  internal::ops_partials_edge<Dx, std::decay_t<Op3>> edge3_;
+  internal::ops_partials_edge<Dx, std::decay_t<Op4>> edge4_;
+  internal::ops_partials_edge<Dx, std::decay_t<Op5>> edge5_;
   using T_return_type = fvar<Dx>;
   explicit operands_and_partials(const Op1& o1) : edge1_(o1) {}
   operands_and_partials(const Op1& o1, const Op2& o2)
diff --git a/stan/math/prim/meta/broadcast_array.hpp b/stan/math/prim/meta/broadcast_array.hpp
index 3c92f4e139b..d7d5c05e3f1 100644
--- a/stan/math/prim/meta/broadcast_array.hpp
+++ b/stan/math/prim/meta/broadcast_array.hpp
@@ -3,6 +3,7 @@
 
 #include <stan/math/prim/meta/is_eigen.hpp>
 #include <stan/math/prim/meta/promote_scalar_type.hpp>
+#include <stan/math/prim/meta/ref_type.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 #include <stdexcept>
 
@@ -28,7 +29,8 @@ class broadcast_array {
    */
   template <typename Y>
   void operator=(const Y& m) {
-    prim_ = m[0];
+    ref_type_t<Y> m_ref = m;
+    prim_ = m_ref[0];
   }
 };
 
diff --git a/stan/math/prim/prob/bernoulli_lcdf.hpp b/stan/math/prim/prob/bernoulli_lcdf.hpp
index d5423823389..0d57760837f 100644
--- a/stan/math/prim/prob/bernoulli_lcdf.hpp
+++ b/stan/math/prim/prob/bernoulli_lcdf.hpp
@@ -36,7 +36,7 @@ return_type_t<T_prob> bernoulli_lcdf(const T_n& n, const T_prob& theta) {
   check_consistent_sizes(function, "Random variable", n,
                          "Probability parameter", theta);
   T_theta_ref theta_ref = theta;
-  check_bounded(function, "Probability parameter", theta, 0.0, 1.0);
+  check_bounded(function, "Probability parameter", theta_ref, 0.0, 1.0);
 
   if (size_zero(n, theta)) {
     return 0.0;
diff --git a/stan/math/prim/prob/bernoulli_logit_lpmf.hpp b/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
index 3e55e02000b..39ae5379195 100644
--- a/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
+++ b/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
@@ -51,18 +51,19 @@ return_type_t<T_prob> bernoulli_logit_lpmf(const T_n& n, const T_prob& theta) {
   T_partials_return logp(0.0);
   operands_and_partials<T_theta_ref> ops_partials(theta_ref);
 
-  const auto& theta_val = as_array_or_scalar(value_of(theta_ref));
+  const auto& theta_val = value_of(theta_ref);
+  const auto& theta_arr = as_array_or_scalar(theta_val);
+  const auto& n_double = value_of_rec(n_ref);
 
   auto signs = to_ref_if<!is_constant<T_prob>::value>(
-      (2 * as_array_or_scalar(value_of_rec(n_ref)) - 1));
+      (2 * as_array_or_scalar(n_double) - 1));
   T_partials_array ntheta;
   if (is_vector<T_n>::value || is_vector<T_prob>::value) {
-    ntheta = forward_as<T_partials_array>(signs * theta_val);
+    ntheta = forward_as<T_partials_array>(signs * theta_arr);
   } else {
-    T_partials_return theta_val2 = theta_val;
-    T_partials_return ntheta_s = forward_as<T_partials_return>(signs * theta_val);
-    ntheta = T_partials_array::Constant(1, 1,
-                                        ntheta_s);
+    T_partials_return ntheta_s
+        = forward_as<T_partials_return>(signs * theta_arr);
+    ntheta = T_partials_array::Constant(1, 1, ntheta_s);
   }
   T_partials_array exp_m_ntheta = exp(-ntheta);
   static const double cutoff = 20.0;
@@ -72,12 +73,21 @@ return_type_t<T_prob> bernoulli_logit_lpmf(const T_n& n, const T_prob& theta) {
                   (ntheta < -cutoff).select(ntheta, -log1p(exp_m_ntheta))));
 
   if (!is_constant_all<T_prob>::value) {
-    ops_partials.edge1_.partials_
-        = (ntheta > cutoff)
+    if (is_vector<T_prob>::value) {
+      ops_partials.edge1_.partials_ = forward_as<T_partials_array>(
+          (ntheta > cutoff)
               .select(-exp_m_ntheta,
-                      (ntheta < -cutoff)
-                          .select(signs,
-                                  signs * exp_m_ntheta / (exp_m_ntheta + 1)));
+                      (ntheta >= -cutoff)
+                          .select(signs * exp_m_ntheta / (exp_m_ntheta + 1),
+                                  signs)));
+    } else {
+      ops_partials.edge1_.partials_[0]
+          = sum((ntheta > cutoff)
+                    .select(-exp_m_ntheta, (ntheta >= -cutoff)
+                                               .select(signs * exp_m_ntheta
+                                                           / (exp_m_ntheta + 1),
+                                                       signs)));
+    }
   }
   return ops_partials.build(logp);
 }

From 5f243cd619c8be8a3936816e939da6c764532b4e Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Mon, 1 Jun 2020 15:30:14 +0000
Subject: [PATCH 18/21] [Jenkins] auto-formatting by clang-format version
 6.0.0-1ubuntu2~16.04.1 (tags/RELEASE_600/final)

---
 .../prim/prob/hmm_hidden_state_prob_test.cpp    |  2 +-
 .../unit/math/prim/prob/hmm_latent_rng_test.cpp |  2 +-
 test/unit/math/prim/prob/hmm_marginal_test.cpp  | 17 +++++++----------
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
index 85cd346c31b..747784ab5b9 100644
--- a/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
+++ b/test/unit/math/prim/prob/hmm_hidden_state_prob_test.cpp
@@ -57,7 +57,7 @@ TEST(hmm_test_nonstandard, hidden_state_symmetry) {
   Eigen::VectorXd rho(n_states);
   rho << 0.3, 0.7;
   Eigen::MatrixXd log_omegas
-    = Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
+      = Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
 
   Eigen::MatrixXd prob = hmm_hidden_state_prob(log_omegas, Gamma, rho);
 
diff --git a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
index fe2a8090fab..9b88ae09c00 100644
--- a/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
+++ b/test/unit/math/prim/prob/hmm_latent_rng_test.cpp
@@ -59,7 +59,7 @@ TEST(hmm_rng_test, chiSquareGoodnessFitTest_symmetric) {
   Eigen::VectorXd rho(n_states);
   rho << 0.3, 0.7;
   Eigen::MatrixXd log_omegas
-    = Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
+      = Eigen::MatrixXd::Ones(n_states, n_transitions + 1);
 
   boost::random::mt19937 rng;
   int N = 10000;
diff --git a/test/unit/math/prim/prob/hmm_marginal_test.cpp b/test/unit/math/prim/prob/hmm_marginal_test.cpp
index 7aad4865149..3f9b653eb31 100644
--- a/test/unit/math/prim/prob/hmm_marginal_test.cpp
+++ b/test/unit/math/prim/prob/hmm_marginal_test.cpp
@@ -97,16 +97,15 @@ TEST(hmm_marginal, exceptions) {
 
   // Gamma is not square.
   MatrixXd Gamma_rec(n_states, n_states + 1);
-  EXPECT_THROW_MSG(
-      hmm_marginal(log_omegas, Gamma_rec, rho), std::invalid_argument,
-      "hmm_marginal: Expecting a square matrix; rows of Gamma (2) "
-      "and columns of Gamma (3) must match in size");
+  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma_rec, rho),
+                   std::invalid_argument,
+                   "hmm_marginal: Expecting a square matrix; rows of Gamma (2) "
+                   "and columns of Gamma (3) must match in size");
 
   // Gamma has a column that is not a simplex.
   MatrixXd Gamma_bad = Gamma;
   Gamma_bad(0, 0) = Gamma(0, 0) + 1;
-  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma_bad, rho),
-                   std::domain_error,
+  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma_bad, rho), std::domain_error,
                    "hmm_marginal: Gamma[i, ] is not a valid simplex. "
                    "sum(Gamma[i, ]) = 2, but should be 1")
 
@@ -127,16 +126,14 @@ TEST(hmm_marginal, exceptions) {
   // rho is not a simplex.
   VectorXd rho_bad = rho;
   rho_bad(0) = rho(0) + 1;
-  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma, rho_bad),
-                   std::domain_error,
+  EXPECT_THROW_MSG(hmm_marginal(log_omegas, Gamma, rho_bad), std::domain_error,
                    "hmm_marginal: rho is not a valid simplex. "
                    "sum(rho) = 2, but should be 1")
 
   // The size of rho is inconsistent with that of log_omega
   VectorXd rho_wrong_size(n_states + 1);
   EXPECT_THROW_MSG(
-      hmm_marginal(log_omegas, Gamma, rho_wrong_size),
-      std::invalid_argument,
+      hmm_marginal(log_omegas, Gamma, rho_wrong_size), std::invalid_argument,
       "hmm_marginal: rho has dimension = 3, expecting dimension = 2;"
       " a function was called with arguments of different scalar,"
       " array, vector, or matrix types, and they were not consistently sized;"

From 9564e2a96ce28ab71534a434563f3ace8732e215 Mon Sep 17 00:00:00 2001
From: tadej <tadej.c@gmail.com>
Date: Tue, 2 Jun 2020 19:43:06 +0200
Subject: [PATCH 19/21] added missing include

---
 stan/math/prim/prob/bernoulli_logit_lpmf.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stan/math/prim/prob/bernoulli_logit_lpmf.hpp b/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
index 39ae5379195..589955fbc97 100644
--- a/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
+++ b/stan/math/prim/prob/bernoulli_logit_lpmf.hpp
@@ -7,6 +7,7 @@
 #include <stan/math/prim/fun/log1p.hpp>
 #include <stan/math/prim/fun/max_size.hpp>
 #include <stan/math/prim/fun/size_zero.hpp>
+#include <stan/math/prim/fun/to_ref.hpp>
 #include <stan/math/prim/fun/value_of.hpp>
 #include <stan/math/prim/fun/value_of_rec.hpp>
 #include <cmath>

From 7db8ce52988f4045b1f786d97cff61c7b55da0e8 Mon Sep 17 00:00:00 2001
From: Niko Huurre <nhuurre@protonmail.com>
Date: Fri, 5 Jun 2020 11:15:22 +0300
Subject: [PATCH 20/21] fix odes

---
 stan/math/prim/functor/integrate_ode_rk45.hpp |  6 ++++--
 stan/math/prim/functor/ode_rk45.hpp           |  9 ++++++---
 stan/math/rev/functor/integrate_ode_adams.hpp |  6 ++++--
 stan/math/rev/functor/ode_adams.hpp           | 12 ++++++++----
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/stan/math/prim/functor/integrate_ode_rk45.hpp b/stan/math/prim/functor/integrate_ode_rk45.hpp
index 9e3f755bebe..4a298b6c44f 100644
--- a/stan/math/prim/functor/integrate_ode_rk45.hpp
+++ b/stan/math/prim/functor/integrate_ode_rk45.hpp
@@ -15,7 +15,8 @@ namespace math {
  */
 template <typename F, typename T1, typename T_param, typename T_t0,
           typename T_ts>
-std::vector<std::vector<return_type_t<T1, T_t0, T_ts, T_param>>>
+std::vector<std::vector<return_type_t<typename F::captured_scalar_t__,
+                                      T1, T_t0, T_ts, T_param>>>
 integrate_ode_rk45(const F& f, const std::vector<T1>& y0, const T_t0& t0,
                    const std::vector<T_ts>& ts,
                    const std::vector<T_param>& theta,
@@ -28,7 +29,8 @@ integrate_ode_rk45(const F& f, const std::vector<T1>& y0, const T_t0& t0,
       = ode_rk45_tol(f_adapted, to_vector(y0), t0, ts, relative_tolerance,
                      absolute_tolerance, max_num_steps, msgs, theta, x, x_int);
 
-  std::vector<std::vector<return_type_t<T1, T_param, T_t0, T_ts>>> y_converted;
+  std::vector<std::vector<return_type_t<typename F::captured_scalar_t__,
+                                        T1, T_param, T_t0, T_ts>>> y_converted;
   for (size_t i = 0; i < y.size(); ++i)
     y_converted.push_back(to_array_1d(y[i]));
 
diff --git a/stan/math/prim/functor/ode_rk45.hpp b/stan/math/prim/functor/ode_rk45.hpp
index 9ceb84d8d9d..c33d171dfa5 100644
--- a/stan/math/prim/functor/ode_rk45.hpp
+++ b/stan/math/prim/functor/ode_rk45.hpp
@@ -51,7 +51,8 @@ namespace math {
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts, Args...>,
+std::vector<Eigen::Matrix<stan::return_type_t<typename F::captured_scalar_t__,
+                                              T_initial, T_t0, T_ts, Args...>,
                           Eigen::Dynamic, 1>>
 ode_rk45_tol(const F& f,
              const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0_arg, T_t0 t0,
@@ -89,7 +90,8 @@ ode_rk45_tol(const F& f,
                         absolute_tolerance);
   check_positive("integrate_ode_rk45", "max_num_steps", max_num_steps);
 
-  using return_t = return_type_t<T_initial, T_t0, T_ts, Args...>;
+  using return_t = return_type_t<typename F::captured_scalar_t__,
+                                 T_initial, T_t0, T_ts, Args...>;
   // creates basic or coupled system by template specializations
   coupled_ode_system<F, T_initial_or_t0, Args...> coupled_system(f, y0, msgs,
                                                                  args...);
@@ -173,7 +175,8 @@ ode_rk45_tol(const F& f,
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... Args>
 std::vector<
-    Eigen::Matrix<stan::return_type_t<T_initial, Args...>, Eigen::Dynamic, 1>>
+    Eigen::Matrix<stan::return_type_t<typename F::captured_scalar_t__,
+                                      T_initial, Args...>, Eigen::Dynamic, 1>>
 ode_rk45(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
          T_t0 t0, const std::vector<T_ts>& ts, std::ostream* msgs,
          const Args&... args) {
diff --git a/stan/math/rev/functor/integrate_ode_adams.hpp b/stan/math/rev/functor/integrate_ode_adams.hpp
index f108ef9a7e0..863f3a9b12c 100644
--- a/stan/math/rev/functor/integrate_ode_adams.hpp
+++ b/stan/math/rev/functor/integrate_ode_adams.hpp
@@ -14,7 +14,8 @@ namespace math {
  */
 template <typename F, typename T_initial, typename T_param, typename T_t0,
           typename T_ts>
-std::vector<std::vector<return_type_t<T_initial, T_param, T_t0, T_ts>>>
+std::vector<std::vector<return_type_t<typename F::captured_scalar_t__,
+                                      T_initial, T_param, T_t0, T_ts>>>
 integrate_ode_adams(const F& f, const std::vector<T_initial>& y0,
                     const T_t0& t0, const std::vector<T_ts>& ts,
                     const std::vector<T_param>& theta,
@@ -28,7 +29,8 @@ integrate_ode_adams(const F& f, const std::vector<T_initial>& y0,
       = ode_adams_tol(f_adapted, to_vector(y0), t0, ts, relative_tolerance,
                       absolute_tolerance, max_num_steps, msgs, theta, x, x_int);
 
-  std::vector<std::vector<return_type_t<T_initial, T_param, T_t0, T_ts>>>
+  std::vector<std::vector<return_type_t<typename F::captured_scalar_t__,
+                                        T_initial, T_param, T_t0, T_ts>>>
       y_converted;
   for (size_t i = 0; i < y.size(); ++i)
     y_converted.push_back(to_array_1d(y[i]));
diff --git a/stan/math/rev/functor/ode_adams.hpp b/stan/math/rev/functor/ode_adams.hpp
index 15beb0a11af..0126c19dfa4 100644
--- a/stan/math/rev/functor/ode_adams.hpp
+++ b/stan/math/rev/functor/ode_adams.hpp
@@ -10,7 +10,8 @@ namespace stan {
 namespace math {
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts, T_Args...>,
+std::vector<Eigen::Matrix<stan::return_type_t<typename F::captured_scalar_t__,
+                                              T_initial, T_t0, T_ts, T_Args...>,
                           Eigen::Dynamic, 1>>
 ode_adams_tol_sens_error(const F& f,
                          const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
@@ -27,7 +28,8 @@ ode_adams_tol_sens_error(const F& f,
 
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts, T_Args...>,
+std::vector<Eigen::Matrix<stan::return_type_t<typename F::captured_scalar_t__,
+                                              T_initial, T_t0, T_ts, T_Args...>,
                           Eigen::Dynamic, 1>>
 ode_adams_tol_error(const F& f,
                     const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
@@ -77,7 +79,8 @@ ode_adams_tol_error(const F& f,
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts, T_Args...>,
+std::vector<Eigen::Matrix<stan::return_type_t<typename F::captured_scalar_t__,
+                                              T_initial, T_t0, T_ts, T_Args...>,
                           Eigen::Dynamic, 1>>
 ode_adams_tol(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
               const T_t0& t0, const std::vector<T_ts>& ts,
@@ -127,7 +130,8 @@ ode_adams_tol(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts, T_Args...>,
+std::vector<Eigen::Matrix<stan::return_type_t<typename F::captured_scalar_t__,
+                                              T_initial, T_t0, T_ts, T_Args...>,
                           Eigen::Dynamic, 1>>
 ode_adams(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
           const T_t0& t0, const std::vector<T_ts>& ts, std::ostream* msgs,

From 6010b55c384358b4b27d61751195b65d91c8864d Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Fri, 5 Jun 2020 11:50:21 -0400
Subject: [PATCH 21/21] [Jenkins] auto-formatting by clang-format version
 6.0.0-1ubuntu2 (tags/RELEASE_600/final)

---
 stan/math/prim/functor/coupled_ode_system.hpp |  6 ++--
 stan/math/prim/functor/integrate_ode_rk45.hpp |  9 +++---
 ...grate_ode_std_vector_interface_adapter.hpp | 30 ++++++++++++-------
 stan/math/prim/functor/ode_rk45.hpp           | 10 +++----
 .../prim/functor/ode_store_sensitivities.hpp  | 10 +++----
 stan/math/rev/functor/coupled_ode_system.hpp  | 13 ++++----
 stan/math/rev/functor/cvodes_integrator.hpp   | 22 +++++++-------
 stan/math/rev/functor/ode_bdf.hpp             | 14 +++++----
 .../rev/functor/ode_store_sensitivities.hpp   | 15 +++++-----
 9 files changed, 74 insertions(+), 55 deletions(-)

diff --git a/stan/math/prim/functor/coupled_ode_system.hpp b/stan/math/prim/functor/coupled_ode_system.hpp
index 3e32517153e..7971074bdb0 100644
--- a/stan/math/prim/functor/coupled_ode_system.hpp
+++ b/stan/math/prim/functor/coupled_ode_system.hpp
@@ -134,9 +134,9 @@ struct coupled_ode_system
                      const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
                      std::ostream* msgs, const Args&... args)
       : coupled_ode_system_impl<
-          std::is_arithmetic<return_type_t<typename F::captured_scalar_t__,
-                                           T_initial, Args...>>::value,
-          F, T_initial, Args...>(f, y0, msgs, args...) {}
+            std::is_arithmetic<return_type_t<typename F::captured_scalar_t__,
+                                             T_initial, Args...>>::value,
+            F, T_initial, Args...>(f, y0, msgs, args...) {}
 };
 
 }  // namespace math
diff --git a/stan/math/prim/functor/integrate_ode_rk45.hpp b/stan/math/prim/functor/integrate_ode_rk45.hpp
index 4a298b6c44f..38d7ed19ace 100644
--- a/stan/math/prim/functor/integrate_ode_rk45.hpp
+++ b/stan/math/prim/functor/integrate_ode_rk45.hpp
@@ -15,8 +15,8 @@ namespace math {
  */
 template <typename F, typename T1, typename T_param, typename T_t0,
           typename T_ts>
-std::vector<std::vector<return_type_t<typename F::captured_scalar_t__,
-                                      T1, T_t0, T_ts, T_param>>>
+std::vector<std::vector<
+    return_type_t<typename F::captured_scalar_t__, T1, T_t0, T_ts, T_param>>>
 integrate_ode_rk45(const F& f, const std::vector<T1>& y0, const T_t0& t0,
                    const std::vector<T_ts>& ts,
                    const std::vector<T_param>& theta,
@@ -29,8 +29,9 @@ integrate_ode_rk45(const F& f, const std::vector<T1>& y0, const T_t0& t0,
       = ode_rk45_tol(f_adapted, to_vector(y0), t0, ts, relative_tolerance,
                      absolute_tolerance, max_num_steps, msgs, theta, x, x_int);
 
-  std::vector<std::vector<return_type_t<typename F::captured_scalar_t__,
-                                        T1, T_param, T_t0, T_ts>>> y_converted;
+  std::vector<std::vector<
+      return_type_t<typename F::captured_scalar_t__, T1, T_param, T_t0, T_ts>>>
+      y_converted;
   for (size_t i = 0; i < y.size(); ++i)
     y_converted.push_back(to_array_1d(y[i]));
 
diff --git a/stan/math/prim/functor/integrate_ode_std_vector_interface_adapter.hpp b/stan/math/prim/functor/integrate_ode_std_vector_interface_adapter.hpp
index bdb7da0a4c4..ee64d97ddb6 100644
--- a/stan/math/prim/functor/integrate_ode_std_vector_interface_adapter.hpp
+++ b/stan/math/prim/functor/integrate_ode_std_vector_interface_adapter.hpp
@@ -24,7 +24,8 @@ struct integrate_ode_std_vector_interface_adapter {
   const F f_;
   const int num_vars__;
 
-  integrate_ode_std_vector_interface_adapter(const F& f) : f_(f), num_vars__(f.num_vars__) {}
+  integrate_ode_std_vector_interface_adapter(const F& f)
+      : f_(f), num_vars__(f.num_vars__) {}
 
   template <typename T0, typename T1, typename T2>
   auto operator()(const T0& t, const Eigen::Matrix<T1, Eigen::Dynamic, 1>& y,
@@ -33,8 +34,10 @@ struct integrate_ode_std_vector_interface_adapter {
                   const std::vector<int>& x_int) const {
     return to_vector(f_(t, to_array_1d(y), msgs, theta, x, x_int));
   }
-  template<typename v>
-  void save_varis(v** p) const { f_.save_varis(p); }
+  template <typename v>
+  void save_varis(v** p) const {
+    f_.save_varis(p);
+  }
   void accumulate_adjoints(double* p) const { f_.accumulate_adjoints(p); }
   void set_zer_adjoints() const { f_.set_zero_adjoints(); }
 
@@ -44,24 +47,29 @@ struct integrate_ode_std_vector_interface_adapter {
     struct ValueOf_cl__ {
       const typename F::ValueOf__ f_;
       const static int num_vars__ = 0;
-      ValueOf_cl__(const integrate_ode_std_vector_interface_adapter<F>& f) : f_(f.f_) {}
+      ValueOf_cl__(const integrate_ode_std_vector_interface_adapter<F>& f)
+          : f_(f.f_) {}
       ValueOf_cl__(const DeepCopy_cl__& f) : f_(f.f_) {}
       template <typename T0, typename T1, typename T2>
-      auto operator()(const T0& t, const Eigen::Matrix<T1, Eigen::Dynamic, 1>& y,
+      auto operator()(const T0& t,
+                      const Eigen::Matrix<T1, Eigen::Dynamic, 1>& y,
                       std::ostream* msgs, const std::vector<T2>& theta,
                       const std::vector<double>& x,
                       const std::vector<int>& x_int) const {
         return to_vector(f_(t, to_array_1d(y), msgs, theta, x, x_int));
       }
-      template<typename v>
-      void save_varis(v** p) const { f_.save_varis(p); }
+      template <typename v>
+      void save_varis(v** p) const {
+        f_.save_varis(p);
+      }
       void accumulate_adjoints(double* p) const { f_.accumulate_adjoints(p); }
       void set_zer_adjoints() const { f_.set_zero_adjoints(); }
       using captured_scalar_t__ = double;
       using ValueOf__ = ValueOf_cl__;
       using DeepCopy__ = ValueOf_cl__;
     };
-    DeepCopy_cl__(const integrate_ode_std_vector_interface_adapter<F>& f) : f_(f.f_), num_vars__(f.num_vars__) {}
+    DeepCopy_cl__(const integrate_ode_std_vector_interface_adapter<F>& f)
+        : f_(f.f_), num_vars__(f.num_vars__) {}
     template <typename T0, typename T1, typename T2>
     auto operator()(const T0& t, const Eigen::Matrix<T1, Eigen::Dynamic, 1>& y,
                     std::ostream* msgs, const std::vector<T2>& theta,
@@ -69,8 +77,10 @@ struct integrate_ode_std_vector_interface_adapter {
                     const std::vector<int>& x_int) const {
       return to_vector(f_(t, to_array_1d(y), msgs, theta, x, x_int));
     }
-    template<typename v>
-    void save_varis(v** p) const { f_.save_varis(p); }
+    template <typename v>
+    void save_varis(v** p) const {
+      f_.save_varis(p);
+    }
     void accumulate_adjoints(double* p) const { f_.accumulate_adjoints(p); }
     void set_zer_adjoints() const { f_.set_zero_adjoints(); }
     using captured_scalar_t__ = typename F::captured_scalar_t__;
diff --git a/stan/math/prim/functor/ode_rk45.hpp b/stan/math/prim/functor/ode_rk45.hpp
index c33d171dfa5..7fb1cbe1006 100644
--- a/stan/math/prim/functor/ode_rk45.hpp
+++ b/stan/math/prim/functor/ode_rk45.hpp
@@ -90,8 +90,8 @@ ode_rk45_tol(const F& f,
                         absolute_tolerance);
   check_positive("integrate_ode_rk45", "max_num_steps", max_num_steps);
 
-  using return_t = return_type_t<typename F::captured_scalar_t__,
-                                 T_initial, T_t0, T_ts, Args...>;
+  using return_t = return_type_t<typename F::captured_scalar_t__, T_initial,
+                                 T_t0, T_ts, Args...>;
   // creates basic or coupled system by template specializations
   coupled_ode_system<F, T_initial_or_t0, Args...> coupled_system(f, y0, msgs,
                                                                  args...);
@@ -174,9 +174,9 @@ ode_rk45_tol(const F& f,
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... Args>
-std::vector<
-    Eigen::Matrix<stan::return_type_t<typename F::captured_scalar_t__,
-                                      T_initial, Args...>, Eigen::Dynamic, 1>>
+std::vector<Eigen::Matrix<
+    stan::return_type_t<typename F::captured_scalar_t__, T_initial, Args...>,
+    Eigen::Dynamic, 1>>
 ode_rk45(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
          T_t0 t0, const std::vector<T_ts>& ts, std::ostream* msgs,
          const Args&... args) {
diff --git a/stan/math/prim/functor/ode_store_sensitivities.hpp b/stan/math/prim/functor/ode_store_sensitivities.hpp
index 402df8bada2..68c3b00886a 100644
--- a/stan/math/prim/functor/ode_store_sensitivities.hpp
+++ b/stan/math/prim/functor/ode_store_sensitivities.hpp
@@ -23,11 +23,11 @@ namespace math {
  * @param args Extra arguments passed unmodified through to ODE right hand side
  * @return ODE state
  */
-template <
-    typename F, typename T_y0_t0, typename T_t0, typename T_t, typename... Args,
-    typename
-    = require_all_arithmetic_t<typename F::captured_scalar_t__,
-                               T_y0_t0, T_t0, T_t, scalar_type_t<Args>...>>
+template <typename F, typename T_y0_t0, typename T_t0, typename T_t,
+          typename... Args,
+          typename
+          = require_all_arithmetic_t<typename F::captured_scalar_t__, T_y0_t0,
+                                     T_t0, T_t, scalar_type_t<Args>...>>
 Eigen::VectorXd ode_store_sensitivities(
     const F& f, const Eigen::VectorXd& coupled_state,
     const Eigen::Matrix<T_y0_t0, Eigen::Dynamic, 1>& y0, T_t0 t0, T_t t,
diff --git a/stan/math/rev/functor/coupled_ode_system.hpp b/stan/math/rev/functor/coupled_ode_system.hpp
index 7a030bbb566..c74f1a098b7 100644
--- a/stan/math/rev/functor/coupled_ode_system.hpp
+++ b/stan/math/rev/functor/coupled_ode_system.hpp
@@ -134,9 +134,9 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
         },
         args_tuple_);
 
-    Eigen::Matrix<var, Eigen::Dynamic, 1> f_y_t_vars
-        = apply([&](auto&&... args) { return f_vars_(t, y_vars, msgs_, args...); },
-                local_args_tuple);
+    Eigen::Matrix<var, Eigen::Dynamic, 1> f_y_t_vars = apply(
+        [&](auto&&... args) { return f_vars_(t, y_vars, msgs_, args...); },
+        local_args_tuple);
 
     check_size_match("coupled_ode_system", "dy_dt", f_y_t_vars.size(), "states",
                      N_);
@@ -180,7 +180,8 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
       for (size_t j = 0; j < f_.num_vars__; j++) {
         double temp_deriv = f_adjoints(j);
         for (size_t k = 0; k < N_; k++) {
-          temp_deriv += z[N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * j + k] * y_vars[k].adj();
+          temp_deriv += z[N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * j + k]
+                        * y_vars[k].adj();
         }
 
         dz_dt[N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * j + i] = temp_deriv;
@@ -195,7 +196,9 @@ struct coupled_ode_system_impl<false, F, T_initial, Args...> {
    *
    * @return size of the coupled system.
    */
-  size_t size() const { return N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * f_.num_vars__; }
+  size_t size() const {
+    return N_ + N_ * y0_vars_ + N_ * args_vars_ + N_ * f_.num_vars__;
+  }
 
   /**
    * Returns the initial state of the coupled system.
diff --git a/stan/math/rev/functor/cvodes_integrator.hpp b/stan/math/rev/functor/cvodes_integrator.hpp
index 94b9c253324..82fa63bc91e 100644
--- a/stan/math/rev/functor/cvodes_integrator.hpp
+++ b/stan/math/rev/functor/cvodes_integrator.hpp
@@ -107,9 +107,9 @@ class cvodes_integrator {
   inline void rhs(double t, const double y[], double dy_dt[]) const {
     const Eigen::VectorXd y_vec = Eigen::Map<const Eigen::VectorXd>(y, N_);
 
-    Eigen::VectorXd dy_dt_vec
-        = apply([&](auto&&... args) { return f_dbl_(t, y_vec, msgs_, args...); },
-                value_of_args_tuple_);
+    Eigen::VectorXd dy_dt_vec = apply(
+        [&](auto&&... args) { return f_dbl_(t, y_vec, msgs_, args...); },
+        value_of_args_tuple_);
 
     check_size_match("cvodes_integrator", "dy_dt", dy_dt_vec.size(), "states",
                      N_);
@@ -234,8 +234,8 @@ class cvodes_integrator {
     LS_ = SUNDenseLinearSolver(nv_state_, A_);
 
     if (y0_vars_ + args_vars_ + f_.num_vars__ > 0) {
-      nv_state_sens_
-          = N_VCloneVectorArrayEmpty_Serial(y0_vars_ + args_vars_ + f_.num_vars__, nv_state_);
+      nv_state_sens_ = N_VCloneVectorArrayEmpty_Serial(
+          y0_vars_ + args_vars_ + f_.num_vars__, nv_state_);
       for (std::size_t i = 0; i < y0_vars_ + args_vars_ + f_.num_vars__; i++) {
         NV_DATA_S(nv_state_sens_[i]) = &coupled_state_[N_] + i * N_;
       }
@@ -246,8 +246,9 @@ class cvodes_integrator {
     SUNLinSolFree(LS_);
     SUNMatDestroy(A_);
     N_VDestroy_Serial(nv_state_);
-    if (y0_vars_ + args_vars_ + f_.num_vars__> 0) {
-      N_VDestroyVectorArray_Serial(nv_state_sens_, y0_vars_ + args_vars_ + f_.num_vars__);
+    if (y0_vars_ + args_vars_ + f_.num_vars__ > 0) {
+      N_VDestroyVectorArray_Serial(nv_state_sens_,
+                                   y0_vars_ + args_vars_ + f_.num_vars__);
     }
   }
 
@@ -298,9 +299,10 @@ class cvodes_integrator {
       // initialize forward sensitivity system of CVODES as needed
       if (y0_vars_ + args_vars_ + f_.num_vars__ > 0) {
         check_flag_sundials(
-            CVodeSensInit(cvodes_mem, static_cast<int>(y0_vars_ + args_vars_ + f_.num_vars__),
-                          CV_STAGGERED, &cvodes_integrator::cv_rhs_sens,
-                          nv_state_sens_),
+            CVodeSensInit(
+                cvodes_mem,
+                static_cast<int>(y0_vars_ + args_vars_ + f_.num_vars__),
+                CV_STAGGERED, &cvodes_integrator::cv_rhs_sens, nv_state_sens_),
             "CVodeSensInit");
 
         if (include_sensitivities_in_errors_) {
diff --git a/stan/math/rev/functor/ode_bdf.hpp b/stan/math/rev/functor/ode_bdf.hpp
index e124c3af4c4..d7cb40c3450 100644
--- a/stan/math/rev/functor/ode_bdf.hpp
+++ b/stan/math/rev/functor/ode_bdf.hpp
@@ -77,9 +77,10 @@ ode_bdf_tol_error(const F& f,
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts,
-                          typename F::captured_scalar_t__, T_Args...>,
-                          Eigen::Dynamic, 1>>
+std::vector<Eigen::Matrix<
+    stan::return_type_t<T_initial, T_t0, T_ts, typename F::captured_scalar_t__,
+                        T_Args...>,
+    Eigen::Dynamic, 1>>
 ode_bdf_tol(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
             const T_t0& t0, const std::vector<T_ts>& ts,
             double relative_tolerance, double absolute_tolerance,
@@ -127,9 +128,10 @@ ode_bdf_tol(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
  */
 template <typename F, typename T_initial, typename T_t0, typename T_ts,
           typename... T_Args>
-std::vector<Eigen::Matrix<stan::return_type_t<T_initial, T_t0, T_ts,
-                          typename F::captured_scalar_t__, T_Args...>,
-                          Eigen::Dynamic, 1>>
+std::vector<Eigen::Matrix<
+    stan::return_type_t<T_initial, T_t0, T_ts, typename F::captured_scalar_t__,
+                        T_Args...>,
+    Eigen::Dynamic, 1>>
 ode_bdf(const F& f, const Eigen::Matrix<T_initial, Eigen::Dynamic, 1>& y0,
         const T_t0& t0, const std::vector<T_ts>& ts, std::ostream* msgs,
         const T_Args&... args) {
diff --git a/stan/math/rev/functor/ode_store_sensitivities.hpp b/stan/math/rev/functor/ode_store_sensitivities.hpp
index 6c897ac7853..1b119c9b5ca 100644
--- a/stan/math/rev/functor/ode_store_sensitivities.hpp
+++ b/stan/math/rev/functor/ode_store_sensitivities.hpp
@@ -30,11 +30,10 @@ namespace math {
  * @param args Extra arguments passed unmodified through to ODE right hand side
  * @return ODE state with scalar type var
  */
-template <typename F, typename T_y0_t0, typename T_t0, typename T_t,
-          typename... Args,
-          typename
-          = require_any_autodiff_t<typename F::captured_scalar_t__, T_y0_t0,
-                                   T_t0, T_t, scalar_type_t<Args>...>>
+template <
+    typename F, typename T_y0_t0, typename T_t0, typename T_t, typename... Args,
+    typename = require_any_autodiff_t<typename F::captured_scalar_t__, T_y0_t0,
+                                      T_t0, T_t, scalar_type_t<Args>...>>
 Eigen::Matrix<var, Eigen::Dynamic, 1> ode_store_sensitivities(
     const F& f, const Eigen::VectorXd& coupled_state,
     const Eigen::Matrix<T_y0_t0, Eigen::Dynamic, 1>& y0, const T_t0& t0,
@@ -64,7 +63,8 @@ Eigen::Matrix<var, Eigen::Dynamic, 1> ode_store_sensitivities(
   }
 
   for (size_t j = 0; j < N; j++) {
-    const size_t total_vars = y0_vars + args_vars + t0_vars + t_vars + f.num_vars__;
+    const size_t total_vars
+        = y0_vars + args_vars + t0_vars + t_vars + f.num_vars__;
 
     vari** varis
         = ChainableStack::instance_->memalloc_.alloc_array<vari*>(total_vars);
@@ -93,7 +93,8 @@ Eigen::Matrix<var, Eigen::Dynamic, 1> ode_store_sensitivities(
     f.save_varis(varis_ptr);
     varis_ptr += f.num_vars__;
     for (std::size_t k = 0; k < f.num_vars__; ++k) {
-      *partials_ptr = coupled_state(N + N * y0_vars + N * args_vars + N * k + j);
+      *partials_ptr
+          = coupled_state(N + N * y0_vars + N * args_vars + N * k + j);
       partials_ptr++;
     }