From 37f428c4e80ec6255aef057828add6cd7ac1b17f Mon Sep 17 00:00:00 2001 From: KoalaGeo Date: Tue, 19 May 2026 14:56:07 +0100 Subject: [PATCH 1/5] Enhance SQL Alchemy engine with connection pool options Added connection pool options for SQL Alchemy engine. --- pygeoapi/provider/sql.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index 868ee4f88..c38c34f2b 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -134,6 +134,7 @@ def __init__( self.db_user, self._db_password, self.db_conn, + self.db_pool_options, **self.db_options ) self.table_model = get_table_model( @@ -615,6 +616,24 @@ def store_db_parameters( connection_data.get('search_path') or options.pop('search_path', ['public']) ) + # Connection-pool tuning. These are popped out of ``options`` so they + # are NOT passed to the DBAPI as connect_args, and are coerced to a + # hashable form so get_engine() can stay functools.cache()-able. + # Defaults keep SQLAlchemy's QueuePool sizing but, unlike SQLAlchemy's + # default of -1, recycle connections after an hour so that pooled + # connections cannot sit IDLE on the server indefinitely. + pool_defaults = { + 'pool_size': 5, + 'max_overflow': 10, + 'pool_recycle': 3600, + 'pool_timeout': 30, + 'pool_pre_ping': True, + } + self.db_pool_options = tuple(sorted( + (key, type(default)(options.pop(key, default))) + for key, default in pool_defaults.items() + )) + self.db_options = { k: v for k, v in options.items() @@ -631,6 +650,7 @@ def get_engine( user: str, password: str, conn_str: Optional[str] = None, + pool_options: tuple[tuple[str, Any], ...] = (), **connect_args ) -> Engine: """ @@ -643,6 +663,11 @@ def get_engine( :param user: database user :param password: database password :param conn_str: optional connection URL + :param pool_options: hashable tuple of (key, value) pairs controlling + the connection pool (pool_size, max_overflow, + pool_recycle, pool_timeout, pool_pre_ping). Passed + as a tuple rather than a dict so this function can + remain functools.cache()-able. :param connect_args: custom connection arguments to pass to create_engine() :returns: SQL Alchemy engine @@ -658,10 +683,16 @@ def get_engine( ) engine = create_engine( - conn_str, connect_args=connect_args, pool_pre_ping=True + conn_str, + connect_args=connect_args, + **dict(pool_options) + ) + + LOGGER.debug( + f'Created engine for {repr(engine.url)} ' + f'with pool options {dict(pool_options)}.' ) - LOGGER.debug(f'Created engine for {repr(engine.url)}.') return engine From 5841ed9de034438826bb5438ff03671976a1d234 Mon Sep 17 00:00:00 2001 From: KoalaGeo Date: Tue, 19 May 2026 14:56:48 +0100 Subject: [PATCH 2/5] Add db_pool_options to PostgreSQL connection --- pygeoapi/process/manager/postgresql.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygeoapi/process/manager/postgresql.py b/pygeoapi/process/manager/postgresql.py index 7a2adc559..5f1c9eeeb 100644 --- a/pygeoapi/process/manager/postgresql.py +++ b/pygeoapi/process/manager/postgresql.py @@ -95,6 +95,7 @@ def __init__(self, manager_def: dict): self.db_user, self._db_password, self.db_conn, + self.db_pool_options, **self.db_options ) self.table_output = self.output_dir is None From bc68af4209dff101a2b7b35d09e67718206a6c47 Mon Sep 17 00:00:00 2001 From: KoalaGeo Date: Tue, 19 May 2026 15:08:23 +0100 Subject: [PATCH 3/5] Update pool_recycle to SQLAlchemy default value Change pool_recycle to -1 to preserve current behavior. --- pygeoapi/provider/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index c38c34f2b..1b970a03e 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -625,7 +625,7 @@ def store_db_parameters( pool_defaults = { 'pool_size': 5, 'max_overflow': 10, - 'pool_recycle': 3600, + 'pool_recycle': -1, # SQLAlchemy default; preserves current behaviour 'pool_timeout': 30, 'pool_pre_ping': True, } From cd9c836e074456da925b17d7280d9cc583d3d817 Mon Sep 17 00:00:00 2001 From: KoalaGeo Date: Tue, 19 May 2026 15:12:28 +0100 Subject: [PATCH 4/5] Enhance SQLAlchemy connection pooling settings Added SQLAlchemy connection-pool tuning options to configuration. --- docs/source/publishing/ogcapi-features.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/source/publishing/ogcapi-features.rst b/docs/source/publishing/ogcapi-features.rst index 162f1921f..b95158917 100644 --- a/docs/source/publishing/ogcapi-features.rst +++ b/docs/source/publishing/ogcapi-features.rst @@ -703,11 +703,27 @@ These are optional and if not specified, the default from the engine will be use # Number of seconds after which a TCP keepalive message that is not # acknowledged by the server should be retransmitted. keepalives_interval: 1 + # SQLAlchemy connection-pool tuning (optional). Defaults match + # SQLAlchemy's QueuePool and preserve previous behaviour. + # Persistent connections held open per worker process. + pool_size: 5 + # Extra short-lived connections allowed above pool_size. + max_overflow: 10 + # Recreate connections older than this many seconds. -1 (the + # default) never recycles; set a finite value (e.g. 300) so + # pooled connections cannot sit IDLE on the server indefinitely. + pool_recycle: -1 + # Seconds to wait for a connection from the pool before erroring. + pool_timeout: 30 + # Test connections with a lightweight ping before use. + pool_pre_ping: true id_field: osm_id table: hotosm_bdi_waterways geom_field: foo_geom count: true # Optional; Default true; Enable/disable count for improved performance. +`get_engine()` is cached per worker process, so providers that share the same database connection should use identical pool options to keep sharing a single engine; differing pool options intentionally create separate engines. + The PostgreSQL provider is also able to connect to Cloud SQL databases. .. code-block:: yaml From 9838aa97756cf42d29de74ec1d42827715084e39 Mon Sep 17 00:00:00 2001 From: KoalaGeo Date: Tue, 19 May 2026 15:14:22 +0100 Subject: [PATCH 5/5] Add files via upload test_sql_pool_options.py exercises `store_db_parameters()` directly, requires no database, and runs in standard CI. It asserts the zero-behaviour-change defaults, override + typing, no DBAPI leakage, the existing dict-filtering, hashable/deterministic cache keys, and coexistence with search_path. --- tests/provider/test_sql_pool_options.py | 99 +++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tests/provider/test_sql_pool_options.py diff --git a/tests/provider/test_sql_pool_options.py b/tests/provider/test_sql_pool_options.py new file mode 100644 index 000000000..ec2c6f5ee --- /dev/null +++ b/tests/provider/test_sql_pool_options.py @@ -0,0 +1,99 @@ +# ================================================================= +# Tests for configurable SQLAlchemy connection-pool options on the +# SQL provider. These exercise store_db_parameters() directly and do +# not require a live database, so they run in standard CI. +# ================================================================= + +import pytest + +from pygeoapi.provider.sql import store_db_parameters + + +class _Dummy: + """Minimal stand-in for a provider/manager instance.""" + default_port = 5432 + + +CONN = {'host': 'h', 'dbname': 'd', 'user': 'u', 'password': 'p'} + + +def test_pool_options_defaults_preserve_current_behaviour(): + obj = _Dummy() + store_db_parameters(obj, dict(CONN), {}) + pool = dict(obj.db_pool_options) + # Defaults must match pre-existing effective behaviour: + # pool_pre_ping was hardcoded True; pool_recycle was unset (-1). + assert pool['pool_size'] == 5 + assert pool['max_overflow'] == 10 + assert pool['pool_timeout'] == 30 + assert pool['pool_pre_ping'] is True + assert pool['pool_recycle'] == -1 + + +def test_pool_options_are_overridable_and_typed(): + obj = _Dummy() + store_db_parameters( + obj, dict(CONN), + {'pool_size': 2, 'max_overflow': 3, 'pool_recycle': 300}, + ) + pool = dict(obj.db_pool_options) + assert pool['pool_size'] == 2 and isinstance(pool['pool_size'], int) + assert pool['max_overflow'] == 3 + assert pool['pool_recycle'] == 300 + # untouched keys keep defaults + assert pool['pool_timeout'] == 30 + assert pool['pool_pre_ping'] is True + + +def test_pool_options_not_leaked_to_dbapi_connect_args(): + obj = _Dummy() + store_db_parameters( + obj, dict(CONN), + {'connect_timeout': 10, 'pool_size': 2, 'pool_recycle': 300}, + ) + for k in ('pool_size', 'max_overflow', 'pool_recycle', + 'pool_timeout', 'pool_pre_ping'): + assert k not in obj.db_options + # genuine DBAPI connect args still pass through + assert obj.db_options['connect_timeout'] == 10 + + +def test_dict_valued_options_still_filtered(): + obj = _Dummy() + store_db_parameters( + obj, dict(CONN), + {'pool_size': 2, 'zoom': {'min': 0, 'max': 22}}, + ) + assert 'zoom' not in obj.db_options + assert dict(obj.db_pool_options)['pool_size'] == 2 + + +def test_pool_options_hashable_and_deterministic(): + a, b = _Dummy(), _Dummy() + store_db_parameters(a, dict(CONN), {'pool_size': 2}) + store_db_parameters(b, dict(CONN), {'pool_size': 2}) + # identical config -> identical key -> shared engine via functools.cache + assert a.db_pool_options == b.db_pool_options + assert hash(a.db_pool_options) == hash(b.db_pool_options) + + c = _Dummy() + store_db_parameters(c, dict(CONN), {'pool_size': 9}) + # differing pool config -> distinct key (separate engine, by design) + assert c.db_pool_options != a.db_pool_options + + +def test_pool_options_coexist_with_search_path(): + obj = _Dummy() + store_db_parameters( + obj, dict(CONN), + {'search_path': ['published', 'public'], 'pool_size': 4}, + ) + assert obj.db_search_path == ('published', 'public') + assert dict(obj.db_pool_options)['pool_size'] == 4 + + +@pytest.mark.parametrize('bad', [{'pool_size': 'two'}]) +def test_non_integer_pool_value_raises(bad): + # type coercion surfaces bad config loudly rather than silently + with pytest.raises(ValueError): + store_db_parameters(_Dummy(), dict(CONN), bad)