From ee192d958364ee6ad837228fd3c6b7626ab3e6de Mon Sep 17 00:00:00 2001 From: whning Date: Fri, 5 Jun 2026 01:40:51 +0800 Subject: [PATCH] Fix WMA weighted_mean NaN handling (#1993) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The weighted_mean function in WMA._load_internal uses np.nanmean(w * x), which produces incorrect results when NaN values are present in the input. The problem is that np.nanmean renormalizes the product (w * x) by counting how many non-NaN products there are, but it doesn't renormalize the weights themselves — so the weight denominator still sums over all elements including the NaN positions, leading to a biased result. For example, with x = [1, NaN, 3] and w = [1/6, 2/6, 3/6]: Old: np.nanmean(w * x) = (1/6 + 9/6) / 2 = 0.833 Correct weighted avg: (1*1/4 + 3*3/4) = 2.5 The fix filters out NaN positions from both x and w before computing the weighted sum, then renormalizes w so it sums to 1 over the valid elements. --- qlib/data/ops.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qlib/data/ops.py b/qlib/data/ops.py index d9a2ffbb3e3..8da9584ccda 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -1335,9 +1335,13 @@ def _load_internal(self, instrument, start_index, end_index, *args): # TODO: implement in Cython def weighted_mean(x): + mask = ~np.isnan(x) w = np.arange(len(x)) + 1 + w = w[mask] + if len(w) == 0: + return np.nan w = w / w.sum() - return np.nanmean(w * x) + return np.sum(w * x[mask]) if self.N == 0: series = series.expanding(min_periods=1).apply(weighted_mean, raw=True)