Ensure Jacobian calculations for polynomial features work when T=0

kbattocchi · kbattocchi · commit 1907ad64efc0 · 2025-11-18T22:38:36.000-05:00
Signed-off-by: Keith Battocchi &lt;kebatt@microsoft.com&gt;
diff --git a/econml/iv/sieve/_tsls.py b/econml/iv/sieve/_tsls.py
@@ -151,7 +151,7 @@ def transform(self, X):
         for i in range(X.shape[1]):
             p = powers.copy()
             c = powers[:, i]
-            p[:, i] -= 1
+            p[p[:, i] > 0, i] -= 1
             M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :])
             result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1)
         return result
diff --git a/econml/tests/test_treatment_featurization.py b/econml/tests/test_treatment_featurization.py
@@ -486,20 +486,21 @@ def test_featurization(self):
     def test_jac(self):
         def func_transform(x):
             x = x.reshape(-1, 1)
-            return np.hstack([x, x**2])
+            return np.hstack([np.ones_like(x), x, x**2])
 
         def calc_expected_jacobian(T):
-            jac = DPolynomialFeatures(degree=2, include_bias=False).fit_transform(T)
+            jac = DPolynomialFeatures(degree=2, include_bias=True).fit_transform(T)
             return jac
 
         treatment_featurizers = [
-            PolynomialFeatures(degree=2, include_bias=False),
+            PolynomialFeatures(degree=2, include_bias=True),
             FunctionTransformer(func=func_transform)
         ]
 
         n = 10000
         d_t = 1
         T = np.random.normal(size=(n, d_t))
+        T[0, 0] = 0  # hardcode one value of exactly zero to test that we don't generate nan
 
         for treatment_featurizer in treatment_featurizers:
             # fit a dummy estimator first so the featurizer can be fit to the treatment
diff --git a/econml/utilities.py b/econml/utilities.py
@@ -1460,9 +1460,15 @@ def jac(self, X, epsilon=0.001):
             powers = self.featurizer.powers_
             result = np.zeros(X.shape + (self.featurizer.n_output_features_,))
             for i in range(X.shape[1]):
-                p = powers.copy()
+                # d/dx_i of x_1^p_1 * x_2^p_2 * ... x_i^p_i * ... x_n^p_n
+                # = p_i * x_1^p_1 * x_2^p_2 * ... x_i^(p_i-1) * ... x_n^p_n
+                # store the coefficient in c, and the updated powers in p
                 c = powers[:, i]
-                p[:, i] -= 1
+                p = powers.copy()
+                # decrement p[:, i], but only if it was more than 0 already
+                # (if it is 0 then c=0 so we'll correctly get 0 for a result regardless of the updated entries in p,
+                #  but float_power would return nan if X has a 0 and the exponent is -1)
+                p[p[:, i] > 0, i] -= 1
                 M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :])
                 result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1)
             return result