From 374160e5c4c0f7ea630e823a3a8496767ccf0d3d Mon Sep 17 00:00:00 2001 From: whning Date: Fri, 5 Jun 2026 01:41:08 +0800 Subject: [PATCH] Fix Yahoo date parsing with mixed timezone formats (#2014) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit YahooCollectorCN1d.download_index_data calls pd.to_datetime(df['date']) on timestamps from East Money API that may include timezone offsets (e.g. ' 09:30:00+08:00'). Similarly, YahooNormalize.normalize_yahoo calls pd.to_datetime(df.index) on date strings from Yahoo Finance. Without specifying format='mixed', pd.to_datetime raises when it encounters mixed formats in the same column — some entries are date-only strings while others carry timezone info. Adding format='mixed' lets pandas infer the format per-entry and parse them correctly. --- scripts/data_collector/yahoo/collector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 82660f1112b..1dc4f07ed89 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -240,7 +240,7 @@ def download_index_data(self): logger.warning(f"get {_index_name} error: {e}") continue df.columns = ["date", "open", "close", "high", "low", "volume", "money", "change"] - df["date"] = pd.to_datetime(df["date"]) + df["date"] = pd.to_datetime(df["date"], format="mixed") df = df.astype(float, errors="ignore") df["adjclose"] = df["close"] df["symbol"] = f"sh{_index_code}" @@ -392,7 +392,7 @@ def normalize_yahoo( columns = copy.deepcopy(YahooNormalize.COLUMNS) df = df.copy() df.set_index(date_field_name, inplace=True) - df.index = pd.to_datetime(df.index) + df.index = pd.to_datetime(df.index, format="mixed") df.index = df.index.tz_localize(None) df = df[~df.index.duplicated(keep="first")] if calendar_list is not None: