Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions qlib/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,8 +779,8 @@ def period_feature(self, instrument, field, start_index, end_index, cur_time, pe
if not field.endswith("_q") and not field.endswith("_a"):
raise ValueError("period field must ends with '_q' or '_a'")
quarterly = field.endswith("_q")
index_path = C.dpm.get_data_uri() / "financial" / instrument.lower() / f"{field}.index"
data_path = C.dpm.get_data_uri() / "financial" / instrument.lower() / f"{field}.data"
index_path = C.dpm.get_data_uri() / "financial" / instrument / f"{field}.index"
data_path = C.dpm.get_data_uri() / "financial" / instrument / f"{field}.data"
if not (index_path.exists() and data_path.exists()):
raise FileNotFoundError("No file is found.")
# NOTE: The most significant performance loss is here.
Expand Down
4 changes: 3 additions & 1 deletion qlib/data/storage/file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,9 @@ class FileFeatureStorage(FileStorageMixin, FeatureStorage):
def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict = None, **kwargs):
super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri)
self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
# NOTE: instrument case is normalized by code_to_fname() before reaching here.
# freq/field are also normalized to lowercase for path consistency.
self.file_name = f"{instrument}/{field.lower()}.{freq.lower()}.bin"

def clear(self):
with self.uri.open("wb") as _:
Expand Down
14 changes: 10 additions & 4 deletions qlib/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,10 +916,16 @@ def code_to_fname(code: str):
replace_names += [f"LPT{i}" for i in range(10)]

prefix = "_qlib_"
if str(code).upper() in replace_names:
code = prefix + str(code)

return code
code = str(code)
if code.upper() in replace_names:
code = prefix + code

# Normalize to lowercase for case-insensitive file paths.
# All file-based storage (FileFeatureStorage, FileInstrumentStorage, etc.)
# assumes lowercase paths internally, but not all callers pre-normalize.
# Centralizing the normalization here prevents path mismatches on
# case-sensitive filesystems (e.g., Linux ext4).
return code.lower()


def fname_to_code(fname: str):
Expand Down
12 changes: 7 additions & 5 deletions qlib/utils/paral.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from threading import Thread
from typing import Callable, Text, Union

import joblib
from joblib import Parallel, delayed
from joblib._parallel_backends import MultiprocessingBackend
import pandas as pd
Expand All @@ -22,12 +21,15 @@ def __init__(self, *args, **kwargs):
maxtasksperchild = kwargs.pop("maxtasksperchild", None)
super(ParallelExt, self).__init__(*args, **kwargs)
if isinstance(self._backend, MultiprocessingBackend):
# 2025-05-04 joblib released version 1.5.0, in which _backend_args was removed and replaced by _backend_kwargs.
# 2025-05-04 joblib released version 1.5.0, in which _backend_args was
# removed and replaced by _backend_kwargs.
# Ref: https://github.com/joblib/joblib/pull/1525/files#diff-e4dff8042ce45b443faf49605b75a58df35b8c195978d4a57f4afa695b406bdc
if joblib.__version__ < "1.5.0":
self._backend_args["maxtasksperchild"] = maxtasksperchild # pylint: disable=E1101
else:
# Use getattr/hasattr for robustness: in some joblib versions the
# attribute may not exist yet during __init__.
if hasattr(self, "_backend_kwargs"):
self._backend_kwargs["maxtasksperchild"] = maxtasksperchild # pylint: disable=E1101
elif hasattr(self, "_backend_args"):
self._backend_args["maxtasksperchild"] = maxtasksperchild # pylint: disable=E1101


def datetime_groupby_apply(
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/yahoo/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def download_index_data(self):
logger.warning(f"get {_index_name} error: {e}")
continue
df.columns = ["date", "open", "close", "high", "low", "volume", "money", "change"]
df["date"] = pd.to_datetime(df["date"])
df["date"] = pd.to_datetime(df["date"], format="mixed")
df = df.astype(float, errors="ignore")
df["adjclose"] = df["close"]
df["symbol"] = f"sh{_index_code}"
Expand Down Expand Up @@ -392,7 +392,7 @@ def normalize_yahoo(
columns = copy.deepcopy(YahooNormalize.COLUMNS)
df = df.copy()
df.set_index(date_field_name, inplace=True)
df.index = pd.to_datetime(df.index)
df.index = pd.to_datetime(df.index, format="mixed")
df.index = df.index.tz_localize(None)
df = df[~df.index.duplicated(keep="first")]
if calendar_list is not None:
Expand Down