diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py index 2a94ebd555b..0f4d7b10876 100644 --- a/qlib/utils/__init__.py +++ b/qlib/utils/__init__.py @@ -932,7 +932,10 @@ def fname_to_code(fname: str): prefix = "_qlib_" if fname.startswith(prefix): - fname = fname.lstrip(prefix) + # NOTE: use slicing rather than ``lstrip(prefix)``; ``str.lstrip`` strips + # any leading characters contained in ``prefix`` (e.g. it would turn + # "_qlib_lpt1" into "pt1"), not the prefix as a whole. + fname = fname[len(prefix) :] return fname diff --git a/tests/misc/test_utils.py b/tests/misc/test_utils.py index db5b0724886..0b0a01c2573 100644 --- a/tests/misc/test_utils.py +++ b/tests/misc/test_utils.py @@ -10,6 +10,7 @@ from qlib.constant import REG_CN, REG_US, REG_TW from qlib.utils.time import cal_sam_minute as cal_sam_minute_new, get_min_cal, CN_TIME, US_TIME, TW_TIME from qlib.utils.data import guess_horizon +from qlib.utils import code_to_fname, fname_to_code REG_MAP = {REG_CN: CN_TIME, REG_US: US_TIME, REG_TW: TW_TIME} @@ -127,6 +128,22 @@ def test_guess_horizon(self): result = guess_horizon(label) assert result == 5 + +class FileNameUtils(TestCase): + def test_fname_code_round_trip(self): + # code_to_fname only prefixes reserved Windows device names; fname_to_code + # must strip the whole "_qlib_" prefix, not individual characters. + # exists_qlib_data() lowercases the directory name before converting back, + # and "lpt*" begins with characters that are also in the prefix. + for code in ["CON", "PRN", "AUX", "NUL", "COM1", "LPT1", "LPT9"]: + fname = code_to_fname(code) + self.assertEqual(fname_to_code(fname.lower()), code.lower()) + + # a name whose body consists only of prefix characters must survive + self.assertEqual(fname_to_code("_qlib_lll"), "lll") + # plain codes pass through unchanged + self.assertEqual(fname_to_code("AAPL"), "AAPL") + label = ["Ref($close, -1) / Ref($close, -1) - 1"] result = guess_horizon(label) assert result == 1