From ce4ad1d465e1813e9bed8ddb6808ee56d4b3fb86 Mon Sep 17 00:00:00 2001 From: s0wa48 Date: Wed, 4 Mar 2026 11:02:42 +0100 Subject: [PATCH] fix: resolve dump_update failures due to DataFrame empty check, date filter, and NaT handling --- scripts/dump_bin.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/dump_bin.py b/scripts/dump_bin.py index cb8ed72dabb..dfea59f4249 100644 --- a/scripts/dump_bin.py +++ b/scripts/dump_bin.py @@ -170,6 +170,7 @@ def _get_source_data(self, file_path: Path) -> pd.DataFrame: df = read_as_df(file_path, low_memory=False) if self.date_field_name in df.columns: df[self.date_field_name] = pd.to_datetime(df[self.date_field_name]) + df.dropna(subset=[self.date_field_name], inplace=True) # df.drop_duplicates([self.date_field_name], inplace=True) return df @@ -240,7 +241,10 @@ def data_merge_calendar(self, df: pd.DataFrame, calendars_list: List[pd.Timestam @staticmethod def get_datetime_index(df: pd.DataFrame, calendar_list: List[pd.Timestamp]) -> int: - return calendar_list.index(df.index.min()) + min_index = df.index.min() + if pd.isnull(min_index): + raise ValueError("DataFrame index minimum value is NaT, cannot find in calendar_list") + return calendar_list.index(min_index) def _data_to_bin(self, df: pd.DataFrame, calendar_list: List[pd.Timestamp], features_dir: Path): if df.empty: