microsoft · Whning0513 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/qlib/data/data.py b/qlib/data/data.py
@@ -779,8 +779,8 @@ def period_feature(self, instrument, field, start_index, end_index, cur_time, pe
         if not field.endswith("_q") and not field.endswith("_a"):
             raise ValueError("period field must ends with '_q' or '_a'")
         quarterly = field.endswith("_q")
-        index_path = C.dpm.get_data_uri() / "financial" / instrument.lower() / f"{field}.index"
-        data_path = C.dpm.get_data_uri() / "financial" / instrument.lower() / f"{field}.data"
+        index_path = C.dpm.get_data_uri() / "financial" / instrument / f"{field}.index"
+        data_path = C.dpm.get_data_uri() / "financial" / instrument / f"{field}.data"
         if not (index_path.exists() and data_path.exists()):
             raise FileNotFoundError("No file is found.")
         # NOTE: The most significant performance loss is here.

diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py
@@ -286,7 +286,9 @@ class FileFeatureStorage(FileStorageMixin, FeatureStorage):
     def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict = None, **kwargs):
         super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
         self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri)
-        self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
+        # NOTE: instrument case is normalized by code_to_fname() before reaching here.
+        # freq/field are also normalized to lowercase for path consistency.
+        self.file_name = f"{instrument}/{field.lower()}.{freq.lower()}.bin"
 
     def clear(self):
         with self.uri.open("wb") as _:

diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py
@@ -916,10 +916,16 @@ def code_to_fname(code: str):
     replace_names += [f"LPT{i}" for i in range(10)]
 
     prefix = "_qlib_"
-    if str(code).upper() in replace_names:
-        code = prefix + str(code)
-
-    return code
+    code = str(code)
+    if code.upper() in replace_names:
+        code = prefix + code
+
+    # Normalize to lowercase for case-insensitive file paths.
+    # All file-based storage (FileFeatureStorage, FileInstrumentStorage, etc.)
+    # assumes lowercase paths internally, but not all callers pre-normalize.
+    # Centralizing the normalization here prevents path mismatches on
+    # case-sensitive filesystems (e.g., Linux ext4).
+    return code.lower()
 
 
 def fname_to_code(fname: str):

diff --git a/qlib/utils/paral.py b/qlib/utils/paral.py
@@ -6,7 +6,6 @@
 from threading import Thread
 from typing import Callable, Text, Union
 
-import joblib
 from joblib import Parallel, delayed
 from joblib._parallel_backends import MultiprocessingBackend
 import pandas as pd
@@ -22,12 +21,15 @@ def __init__(self, *args, **kwargs):
         maxtasksperchild = kwargs.pop("maxtasksperchild", None)
         super(ParallelExt, self).__init__(*args, **kwargs)
         if isinstance(self._backend, MultiprocessingBackend):
-            # 2025-05-04 joblib released version 1.5.0, in which _backend_args was removed and replaced by _backend_kwargs.
+            # 2025-05-04 joblib released version 1.5.0, in which _backend_args was
+            # removed and replaced by _backend_kwargs.
             # Ref: https://github.com/joblib/joblib/pull/1525/files#diff-e4dff8042ce45b443faf49605b75a58df35b8c195978d4a57f4afa695b406bdc
-            if joblib.__version__ < "1.5.0":
-                self._backend_args["maxtasksperchild"] = maxtasksperchild  # pylint: disable=E1101
-            else:
+            # Use getattr/hasattr for robustness: in some joblib versions the
+            # attribute may not exist yet during __init__.
+            if hasattr(self, "_backend_kwargs"):
                 self._backend_kwargs["maxtasksperchild"] = maxtasksperchild  # pylint: disable=E1101
+            elif hasattr(self, "_backend_args"):
+                self._backend_args["maxtasksperchild"] = maxtasksperchild  # pylint: disable=E1101
 
 
 def datetime_groupby_apply(

diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py
@@ -240,7 +240,7 @@ def download_index_data(self):
                 logger.warning(f"get {_index_name} error: {e}")
                 continue
             df.columns = ["date", "open", "close", "high", "low", "volume", "money", "change"]
-            df["date"] = pd.to_datetime(df["date"])
+            df["date"] = pd.to_datetime(df["date"], format="mixed")
             df = df.astype(float, errors="ignore")
             df["adjclose"] = df["close"]
             df["symbol"] = f"sh{_index_code}"
@@ -392,7 +392,7 @@ def normalize_yahoo(
         columns = copy.deepcopy(YahooNormalize.COLUMNS)
         df = df.copy()
         df.set_index(date_field_name, inplace=True)
-        df.index = pd.to_datetime(df.index)
+        df.index = pd.to_datetime(df.index, format="mixed")
         df.index = df.index.tz_localize(None)
         df = df[~df.index.duplicated(keep="first")]
         if calendar_list is not None: