microsoft · kaish114 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/scripts/data_collector/README.md b/scripts/data_collector/README.md
@@ -4,7 +4,7 @@
 
 Scripts for data collection
 
-- yahoo: get *US/CN* stock data from *Yahoo Finance*
+- yahoo: get *CN/US/IN/BR/GB* stock data from *Yahoo Finance*
 - fund: get fund data from *http://fund.eastmoney.com*
 - cn_index: get *CN index* from *http://www.csindex.com.cn*, *CSI300*/*CSI100*
 - us_index: get *US index* from *https://en.wikipedia.org/wiki*, *SP500*/*NASDAQ100*/*DJIA*/*SP400*

diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py
@@ -38,6 +38,7 @@
     "US_ALL": "^GSPC",
     "IN_ALL": "^NSEI",
     "BR_ALL": "^BVSP",
+    "GB_ALL": "^FTSE",
 }
 
 _BENCH_CALENDAR_LIST = None
@@ -46,6 +47,7 @@
 _US_SYMBOLS = None
 _IN_SYMBOLS = None
 _BR_SYMBOLS = None
+_GB_SYMBOLS = None
 _EN_FUND_SYMBOLS = None
 _CALENDAR_MAP = {}
 
@@ -74,7 +76,12 @@ def _get_calendar(url):
 
     calendar = _CALENDAR_MAP.get(bench_code, None)
     if calendar is None:
-        if bench_code.startswith("US_") or bench_code.startswith("IN_") or bench_code.startswith("BR_"):
+        if (
+            bench_code.startswith("US_")
+            or bench_code.startswith("IN_")
+            or bench_code.startswith("BR_")
+            or bench_code.startswith("GB_")
+        ):
             print(Ticker(CALENDAR_BENCH_URL_MAP[bench_code]))
             print(Ticker(CALENDAR_BENCH_URL_MAP[bench_code]).history(interval="1d", period="max"))
             df = Ticker(CALENDAR_BENCH_URL_MAP[bench_code]).history(interval="1d", period="max")
@@ -450,6 +457,81 @@ def _format(s_):
     return _BR_SYMBOLS
 
 
+def get_gb_stock_symbols(qlib_data_path: [str, Path] = None) -> list:
+    """get GB (London Stock Exchange) stock symbols via Yahoo Finance screener API.
+
+    Parameters
+    ----------
+    qlib_data_path : str or Path, optional
+        Path to a local qlib data directory whose ``instruments/`` sub-directory
+        will be scanned for additional symbols (e.g. ``ftse100.txt``, ``ftse250.txt``),
+        by default None.
+
+    Returns
+    -------
+    list
+        Sorted, deduplicated list of Yahoo Finance ticker symbols with a ``.L``
+        suffix, e.g. ``["AZN.L", "BP.L", "HSBA.L", ...]``.
+
+    Notes
+    -----
+    Symbols are fetched from the Yahoo Finance predefined ``most_actives_gb``
+    screener endpoint, which covers the full GB market universe tracked by
+    Yahoo Finance.  Pagination is handled automatically (250 results per page).
+    Results are cached in the module-level ``_GB_SYMBOLS`` variable after the
+    first call.
+    """
+    global _GB_SYMBOLS  # pylint: disable=W0603
+
+    _SCREENER_URL = (
+        "https://query1.finance.yahoo.com/v1/finance/screener/predefined/saved"
+        "?scrIds=most_actives_gb&count=250&start={start}"
+    )
+    _HEADERS = {
+        "User-Agent": "Mozilla/5.0",
+        "Accept": "application/json",
+    }
+
+    @deco_retry
+    def _fetch_page(start: int) -> list:
+        resp = requests.get(_SCREENER_URL.format(start=start), headers=_HEADERS, timeout=30)
+        resp.raise_for_status()
+        return resp.json().get("finance", {}).get("result", [{}])[0].get("quotes", [])
+
+    if _GB_SYMBOLS is None:
+        _all_symbols = []
+        start = 0
+        page_size = 250
+
+        while True:
+            quotes = _fetch_page(start)
+            if not quotes:
+                break
+            for q in quotes:
+                symbol = q.get("symbol", "")
+                if symbol.endswith(".L"):
+                    _all_symbols.append(symbol)
+            if len(quotes) < page_size:
+                break
+            start += page_size
+            time.sleep(1)  # avoid triggering Yahoo Finance screener rate limits between pages
+
+        if qlib_data_path is not None:
+            for _index in ["ftse100", "ftse250"]:
+                _ins_path = Path(qlib_data_path).joinpath(f"instruments/{_index}.txt")
+                if _ins_path.exists():
+                    ins_df = pd.read_csv(
+                        _ins_path,
+                        sep="\t",
+                        names=["symbol", "start_date", "end_date"],
+                    )
+                    _all_symbols += ins_df["symbol"].unique().tolist()
+
+        _GB_SYMBOLS = sorted(set(_all_symbols))
+
+    return _GB_SYMBOLS
+
+
 def get_en_fund_symbols(qlib_data_path: [str, Path] = None) -> list:
     """get en fund symbols
 

diff --git a/scripts/data_collector/yahoo/README.md b/scripts/data_collector/yahoo/README.md
@@ -63,7 +63,7 @@ pip install -r requirements.txt
           - `source_dir`: save the directory
           - `interval`: `1d` or `1min`, by default `1d`
             > **due to the limitation of the *YahooFinance API*, only the last month's data is available in `1min`**
-          - `region`: `CN` or `US` or `IN` or `BR`, by default `CN`
+          - `region`: `CN` or `US` or `IN` or `BR` or `GB`, by default `CN`
           - `delay`: `time.sleep(delay)`, by default *0.5*
           - `start`: start datetime, by default *"2000-01-01"*; *closed interval(including start)*
           - `end`: end datetime, by default `pd.Timestamp(datetime.datetime.now() + pd.Timedelta(days=1))`; *open interval(excluding end)*
@@ -92,6 +92,11 @@ pip install -r requirements.txt
           python collector.py download_data --source_dir ~/.qlib/stock_data/source/br_data --start 2003-01-03 --end 2022-03-01 --delay 1 --interval 1d --region BR
           # br 1min data
           python collector.py download_data --source_dir ~/.qlib/stock_data/source/br_data_1min --delay 1 --interval 1min --region BR
+
+          # gb 1d data
+          python collector.py download_data --source_dir ~/.qlib/stock_data/source/gb_data --start 2000-01-04 --end 2025-12-31 --delay 1 --interval 1d --region GB
+          # gb 1min data
+          python collector.py download_data --source_dir ~/.qlib/stock_data/source/gb_data_1min --delay 1 --interval 1min --region GB
           ```
   2. normalize data: `python scripts/data_collector/yahoo/collector.py normalize_data`
 
@@ -105,7 +110,7 @@ pip install -r requirements.txt
           - `max_workers`: number of concurrent, by default *1*
           - `interval`: `1d` or `1min`, by default `1d`
             > if **`interval == 1min`**, `qlib_data_1d_dir` cannot be `None`
-          - `region`: `CN` or `US` or `IN`, by default `CN`
+          - `region`: `CN` or `US` or `IN` or `GB`, by default `CN`
           - `date_field_name`: column *name* identifying time in csv files, by default `date`
           - `symbol_field_name`: column *name* identifying symbol in csv files, by default `symbol`
           - `end_date`: if not `None`, normalize the last date saved (*including end_date*); if `None`, it will ignore this parameter; by default `None`
@@ -133,6 +138,12 @@ pip install -r requirements.txt
 
         # normalize 1min br
         python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/br_data --source_dir ~/.qlib/stock_data/source/br_data_1min --normalize_dir ~/.qlib/stock_data/source/br_1min_nor --region BR --interval 1min
+
+        # normalize 1d gb
+        python scripts/data_collector/yahoo/collector.py normalize_data --source_dir ~/.qlib/stock_data/source/gb_data --normalize_dir ~/.qlib/stock_data/source/gb_1d_nor --region GB --interval 1d
+
+        # normalize 1min gb
+        python collector.py normalize_data --qlib_data_1d_dir ~/.qlib/qlib_data/gb_data --source_dir ~/.qlib/stock_data/source/gb_data_1min --normalize_dir ~/.qlib/stock_data/source/gb_1min_nor --region GB --interval 1min
         ```
   3. dump data: `python scripts/dump_bin.py dump_all`
 
@@ -221,5 +232,16 @@ pip install -r requirements.txt
   df = D.features(inst[:100], ["$close"], freq="1min")
   # get all symbol data
   # df = D.features(D.instruments("all"), ["$close"], freq="1min")
+
+  # 1d data gb
+  # NOTE: Yahoo Finance quotes GB (.L) equities in GBp (pence), not GBP pounds.
+  # e.g. HSBA.L price ~1200 means 1200p = £12. No scaling is applied by the normaliser.
+  qlib.init(provider_uri="~/.qlib/qlib_data/gb_data", region="gb")
+  df = D.features(D.instruments("all"), ["$close"], freq="day")
+
+  # 1min data gb
+  qlib.init(provider_uri="~/.qlib/qlib_data/gb_data_1min", region="gb")
+  inst = D.list_instruments(D.instruments("all"), freq="1min", as_list=True)
+  df = D.features(inst[:100], ["$close"], freq="1min")
   ```
 
diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py
@@ -38,6 +38,7 @@
     get_us_stock_symbols,
     get_in_stock_symbols,
     get_br_stock_symbols,
+    get_gb_stock_symbols,
     generate_minutes_calendar_from_daily,
     calc_adjusted_price,
 )
@@ -364,6 +365,40 @@ class YahooCollectorBR1min(YahooCollectorBR):
     retry = 2
 
 
+class YahooCollectorGB(YahooCollector, ABC):
+    """Collector for GB (London Stock Exchange) equities via Yahoo Finance.
+
+    Symbols carry a ``.L`` suffix as returned by the Yahoo Finance screener
+    (e.g. ``HSBA.L``, ``AZN.L``).  The ``^FTSE`` index is used as the
+    trading-calendar benchmark.  Trading hours are 08:00-16:30 Europe/London.
+    Prices are quoted in GBp (pence) by Yahoo Finance.
+    """
+
+    def get_instrument_list(self):
+        logger.info("get GB (LSE) stock symbols......")
+        symbols = get_gb_stock_symbols()
+        logger.info(f"get {len(symbols)} symbols.")
+        return symbols
+
+    def download_index_data(self):
+        pass
+
+    def normalize_symbol(self, symbol):
+        return code_to_fname(symbol).upper()
+
+    @property
+    def _timezone(self):
+        return "Europe/London"
+
+
+class YahooCollectorGB1d(YahooCollectorGB):
+    pass
+
+
+class YahooCollectorGB1min(YahooCollectorGB):
+    pass
+
+
 class YahooNormalize(BaseNormalize):
     COLUMNS = ["open", "close", "high", "low", "volume"]
     DAILY_FORMAT = "%Y-%m-%d"
@@ -720,6 +755,49 @@ def symbol_to_yahoo(self, symbol):
         return fname_to_code(symbol)
 
 
+class YahooNormalizeGB:
+    """Calendar mixin for GB (London Stock Exchange) normalisers.
+
+    Uses ``^FTSE`` daily history via :func:`get_calendar_list` with key
+    ``"GB_ALL"`` as the trading-date sequence.
+    """
+
+    def _get_calendar_list(self) -> Iterable[pd.Timestamp]:
+        return get_calendar_list("GB_ALL")
+
+
+class YahooNormalizeGB1d(YahooNormalizeGB, YahooNormalize1d):
+    pass
+
+
+class YahooNormalizeGB1dExtend(YahooNormalizeGB, YahooNormalize1dExtend):
+    pass
+
+
+class YahooNormalizeGB1min(YahooNormalizeGB, YahooNormalize1min):
+    """1-minute normaliser for GB (London Stock Exchange) equities.
+
+    LSE trades continuously from 08:00 to 16:30 Europe/London with no midday
+    break.  ``AM_RANGE`` covers the full session; ``PM_RANGE`` is a zero-width
+    sentinel so the parent generator loop is satisfied without adding extra
+    minutes.  ``CALC_PAUSED_NUM = False`` mirrors US/IN/BR 1min normalisers.
+    """
+
+    CALC_PAUSED_NUM = False
+    AM_RANGE = ("08:00:00", "16:29:00")
+    PM_RANGE = ("16:29:00", "16:29:00")
+
+    def _get_calendar_list(self) -> Iterable[pd.Timestamp]:
+        # TODO: support 1min
+        raise ValueError("Does not support 1min")
+
+    def _get_1d_calendar_list(self):
+        return get_calendar_list("GB_ALL")
+
+    def symbol_to_yahoo(self, symbol):
+        return fname_to_code(symbol)
+
+
 class Run(BaseRun):
     def __init__(self, source_dir=None, normalize_dir=None, max_workers=1, interval="1d", region=REGION_CN):
         """