geopython · KoalaGeo · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/docs/source/publishing/ogcapi-features.rst b/docs/source/publishing/ogcapi-features.rst
@@ -703,11 +703,27 @@ These are optional and if not specified, the default from the engine will be use
              # Number of seconds after which a TCP keepalive message that is not
              # acknowledged by the server should be retransmitted.
              keepalives_interval: 1
+             # SQLAlchemy connection-pool tuning (optional). Defaults match
+             # SQLAlchemy's QueuePool and preserve previous behaviour.
+             # Persistent connections held open per worker process.
+             pool_size: 5
+             # Extra short-lived connections allowed above pool_size.
+             max_overflow: 10
+             # Recreate connections older than this many seconds. -1 (the
+             # default) never recycles; set a finite value (e.g. 300) so
+             # pooled connections cannot sit IDLE on the server indefinitely.
+             pool_recycle: -1
+             # Seconds to wait for a connection from the pool before erroring.
+             pool_timeout: 30
+             # Test connections with a lightweight ping before use.
+             pool_pre_ping: true
          id_field: osm_id
          table: hotosm_bdi_waterways
          geom_field: foo_geom
          count: true # Optional; Default true; Enable/disable count for improved performance.
 
+`get_engine()` is cached per worker process, so providers that share the same database connection should use identical pool options to keep sharing a single engine; differing pool options intentionally create separate engines.
+
 The PostgreSQL provider is also able to connect to Cloud SQL databases.
 
 .. code-block:: yaml

diff --git a/pygeoapi/process/manager/postgresql.py b/pygeoapi/process/manager/postgresql.py
@@ -95,6 +95,7 @@ def __init__(self, manager_def: dict):
             self.db_user,
             self._db_password,
             self.db_conn,
+            self.db_pool_options,
             **self.db_options
         )
         self.table_output = self.output_dir is None

diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py
@@ -134,6 +134,7 @@ def __init__(
             self.db_user,
             self._db_password,
             self.db_conn,
+            self.db_pool_options,
-            self.db_pool_options,
+            tuple(self.db_pool_options.items()),  # convert to hashable type, for using with functools.cache
-            self.db_pool_options,
+            tuple(self.db_pool_options.items()),  # convert to hashable type, for using with functools.cache
             **self.db_options
         )
         self.table_model = get_table_model(
@@ -615,6 +616,24 @@ def store_db_parameters(
         connection_data.get('search_path') or
         options.pop('search_path', ['public'])
     )
+    # Connection-pool tuning. These are popped out of ``options`` so they
+    # are NOT passed to the DBAPI as connect_args, and are coerced to a
+    # hashable form so get_engine() can stay functools.cache()-able.
+    # Defaults keep SQLAlchemy's QueuePool sizing but, unlike SQLAlchemy's
+    # default of -1, recycle connections after an hour so that pooled
+    # connections cannot sit IDLE on the server indefinitely.
+    pool_defaults = {
+        'pool_size': 5,
+        'max_overflow': 10,
+        'pool_recycle': -1,   # SQLAlchemy default; preserves current behaviour
+        'pool_timeout': 30,
+        'pool_pre_ping': True,
+    }
+    self.db_pool_options = tuple(sorted(
+        (key, type(default)(options.pop(key, default)))
+        for key, default in pool_defaults.items()
+    ))
+
     self.db_options = {
         k: v
         for k, v in options.items()
@@ -631,6 +650,7 @@ def get_engine(
     user: str,
     password: str,
     conn_str: Optional[str] = None,
+    pool_options: tuple[tuple[str, Any], ...] = (),
     **connect_args
 ) -> Engine:
     """
@@ -643,6 +663,11 @@ def get_engine(
     :param user: database user
     :param password: database password
     :param conn_str: optional connection URL
+    :param pool_options: hashable tuple of (key, value) pairs controlling
+                         the connection pool (pool_size, max_overflow,
+                         pool_recycle, pool_timeout, pool_pre_ping). Passed
+                         as a tuple rather than a dict so this function can
+                         remain functools.cache()-able.
     :param connect_args: custom connection arguments to pass to create_engine()
 
     :returns: SQL Alchemy engine
@@ -658,10 +683,16 @@ def get_engine(
         )
 
     engine = create_engine(
-        conn_str, connect_args=connect_args, pool_pre_ping=True
+        conn_str,
+        connect_args=connect_args,
+        **dict(pool_options)
+    )
+
+    LOGGER.debug(
+        f'Created engine for {repr(engine.url)} '
+        f'with pool options {dict(pool_options)}.'
     )
 
-    LOGGER.debug(f'Created engine for {repr(engine.url)}.')
     return engine
 
 

diff --git a/tests/provider/test_sql_pool_options.py b/tests/provider/test_sql_pool_options.py
@@ -0,0 +1,99 @@
+# =================================================================
+# Tests for configurable SQLAlchemy connection-pool options on the
+# SQL provider. These exercise store_db_parameters() directly and do
+# not require a live database, so they run in standard CI.
+# =================================================================
+
+import pytest
+
+from pygeoapi.provider.sql import store_db_parameters
+
+
+class _Dummy:
+    """Minimal stand-in for a provider/manager instance."""
+    default_port = 5432
+
+
+CONN = {'host': 'h', 'dbname': 'd', 'user': 'u', 'password': 'p'}
+
+
+def test_pool_options_defaults_preserve_current_behaviour():
+    obj = _Dummy()
+    store_db_parameters(obj, dict(CONN), {})
+    pool = dict(obj.db_pool_options)
+    # Defaults must match pre-existing effective behaviour:
+    # pool_pre_ping was hardcoded True; pool_recycle was unset (-1).
+    assert pool['pool_size'] == 5
+    assert pool['max_overflow'] == 10
+    assert pool['pool_timeout'] == 30
+    assert pool['pool_pre_ping'] is True
+    assert pool['pool_recycle'] == -1
+
+
+def test_pool_options_are_overridable_and_typed():
+    obj = _Dummy()
+    store_db_parameters(
+        obj, dict(CONN),
+        {'pool_size': 2, 'max_overflow': 3, 'pool_recycle': 300},
+    )
+    pool = dict(obj.db_pool_options)
+    assert pool['pool_size'] == 2 and isinstance(pool['pool_size'], int)
+    assert pool['max_overflow'] == 3
+    assert pool['pool_recycle'] == 300
+    # untouched keys keep defaults
+    assert pool['pool_timeout'] == 30
+    assert pool['pool_pre_ping'] is True
+
+
+def test_pool_options_not_leaked_to_dbapi_connect_args():
+    obj = _Dummy()
+    store_db_parameters(
+        obj, dict(CONN),
+        {'connect_timeout': 10, 'pool_size': 2, 'pool_recycle': 300},
+    )
+    for k in ('pool_size', 'max_overflow', 'pool_recycle',
+              'pool_timeout', 'pool_pre_ping'):
+        assert k not in obj.db_options
+    # genuine DBAPI connect args still pass through
+    assert obj.db_options['connect_timeout'] == 10
+
+
+def test_dict_valued_options_still_filtered():
+    obj = _Dummy()
+    store_db_parameters(
+        obj, dict(CONN),
+        {'pool_size': 2, 'zoom': {'min': 0, 'max': 22}},
+    )
+    assert 'zoom' not in obj.db_options
+    assert dict(obj.db_pool_options)['pool_size'] == 2
+
+
+def test_pool_options_hashable_and_deterministic():
+    a, b = _Dummy(), _Dummy()
+    store_db_parameters(a, dict(CONN), {'pool_size': 2})
+    store_db_parameters(b, dict(CONN), {'pool_size': 2})
+    # identical config -> identical key -> shared engine via functools.cache
+    assert a.db_pool_options == b.db_pool_options
+    assert hash(a.db_pool_options) == hash(b.db_pool_options)
+
+    c = _Dummy()
+    store_db_parameters(c, dict(CONN), {'pool_size': 9})
+    # differing pool config -> distinct key (separate engine, by design)
+    assert c.db_pool_options != a.db_pool_options
+
+
+def test_pool_options_coexist_with_search_path():
+    obj = _Dummy()
+    store_db_parameters(
+        obj, dict(CONN),
+        {'search_path': ['published', 'public'], 'pool_size': 4},
+    )
+    assert obj.db_search_path == ('published', 'public')
+    assert dict(obj.db_pool_options)['pool_size'] == 4
+
+
+@pytest.mark.parametrize('bad', [{'pool_size': 'two'}])
+def test_non_integer_pool_value_raises(bad):
+    # type coercion surfaces bad config loudly rather than silently
+    with pytest.raises(ValueError):
+        store_db_parameters(_Dummy(), dict(CONN), bad)