From ee192d958364ee6ad837228fd3c6b7626ab3e6de Mon Sep 17 00:00:00 2001
From: whning <whning@zju.edu.cn>
Date: Fri, 5 Jun 2026 01:40:51 +0800
Subject: [PATCH] Fix WMA weighted_mean NaN handling (#1993)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The weighted_mean function in WMA._load_internal uses np.nanmean(w * x),
which produces incorrect results when NaN values are present in the input.
The problem is that np.nanmean renormalizes the product (w * x) by counting
how many non-NaN products there are, but it doesn't renormalize the weights
themselves — so the weight denominator still sums over all elements including
the NaN positions, leading to a biased result.

For example, with x = [1, NaN, 3] and w = [1/6, 2/6, 3/6]:
  Old: np.nanmean(w * x) = (1/6 + 9/6) / 2 = 0.833
  Correct weighted avg: (1*1/4 + 3*3/4) = 2.5

The fix filters out NaN positions from both x and w before computing the
weighted sum, then renormalizes w so it sums to 1 over the valid elements.
---
 qlib/data/ops.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index d9a2ffbb3e3..8da9584ccda 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -1335,9 +1335,13 @@ def _load_internal(self, instrument, start_index, end_index, *args):
         # TODO: implement in Cython
 
         def weighted_mean(x):
+            mask = ~np.isnan(x)
             w = np.arange(len(x)) + 1
+            w = w[mask]
+            if len(w) == 0:
+                return np.nan
             w = w / w.sum()
-            return np.nanmean(w * x)
+            return np.sum(w * x[mask])
 
         if self.N == 0:
             series = series.expanding(min_periods=1).apply(weighted_mean, raw=True)