-
-
Notifications
You must be signed in to change notification settings - Fork 316
Pooling #2345
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Pooling #2345
Changes from all commits
37f428c
5841ed9
bc68af4
cd9c836
9838aa9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -134,6 +134,7 @@ def __init__( | |||||
| self.db_user, | ||||||
| self._db_password, | ||||||
| self.db_conn, | ||||||
| self.db_pool_options, | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as per my other comment, in my opinion it would be clearer if the tuple would be generated here, perhaps also accompanied with a comment mentioning that this is made as a way to enable making use of Also, in modern Python, a dict's insertion ordering is preserved, so I don't think sorting the tuple would be needed.
Suggested change
|
||||||
| **self.db_options | ||||||
| ) | ||||||
| self.table_model = get_table_model( | ||||||
|
|
@@ -615,6 +616,24 @@ def store_db_parameters( | |||||
| connection_data.get('search_path') or | ||||||
| options.pop('search_path', ['public']) | ||||||
| ) | ||||||
| # Connection-pool tuning. These are popped out of ``options`` so they | ||||||
| # are NOT passed to the DBAPI as connect_args, and are coerced to a | ||||||
| # hashable form so get_engine() can stay functools.cache()-able. | ||||||
| # Defaults keep SQLAlchemy's QueuePool sizing but, unlike SQLAlchemy's | ||||||
| # default of -1, recycle connections after an hour so that pooled | ||||||
| # connections cannot sit IDLE on the server indefinitely. | ||||||
|
Comment on lines
+622
to
+624
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part of the comment seems to be outdated, as you end up setting the default value of |
||||||
| pool_defaults = { | ||||||
| 'pool_size': 5, | ||||||
| 'max_overflow': 10, | ||||||
| 'pool_recycle': -1, # SQLAlchemy default; preserves current behaviour | ||||||
| 'pool_timeout': 30, | ||||||
| 'pool_pre_ping': True, | ||||||
| } | ||||||
| self.db_pool_options = tuple(sorted( | ||||||
| (key, type(default)(options.pop(key, default))) | ||||||
| for key, default in pool_defaults.items() | ||||||
| )) | ||||||
|
|
||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my opinion this could be made easier to read and less complex by:
Also, note that your implementation contains a subtle bug when trying to parse {'pool_pre_ping': 'False'} # I'm passing a string with the value of "False"then the outcome would be: # type(True)("False")
TrueIn other words, -pool_defaults = {
- 'pool_size': 5,
- 'max_overflow': 10,
- 'pool_recycle': -1, # SQLAlchemy default; preserves current behaviour
- 'pool_timeout': 30,
- 'pool_pre_ping': True,
-}
-self.db_pool_options = tuple(sorted(
- (key, type(default)(options.pop(key, default)))
- for key, default in pool_defaults.items()
-))
+self.pool_defaults = {
+ 'pool_size': options.pop('pool_size', 5),
+ 'max_overflow': options.pop('max_overflow', 10),
+ 'pool_recycle': options.pop('pool_recycle', -1), # SQLALchemy default - never release connections
+ 'pool_timeout': options.pop('pool_timeout', 30),
+ 'pool_pre_ping': options.pop('pool_pre_ping', True),
+} |
||||||
| self.db_options = { | ||||||
| k: v | ||||||
| for k, v in options.items() | ||||||
|
|
@@ -631,6 +650,7 @@ def get_engine( | |||||
| user: str, | ||||||
| password: str, | ||||||
| conn_str: Optional[str] = None, | ||||||
| pool_options: tuple[tuple[str, Any], ...] = (), | ||||||
| **connect_args | ||||||
| ) -> Engine: | ||||||
| """ | ||||||
|
|
@@ -643,6 +663,11 @@ def get_engine( | |||||
| :param user: database user | ||||||
| :param password: database password | ||||||
| :param conn_str: optional connection URL | ||||||
| :param pool_options: hashable tuple of (key, value) pairs controlling | ||||||
| the connection pool (pool_size, max_overflow, | ||||||
| pool_recycle, pool_timeout, pool_pre_ping). Passed | ||||||
| as a tuple rather than a dict so this function can | ||||||
| remain functools.cache()-able. | ||||||
| :param connect_args: custom connection arguments to pass to create_engine() | ||||||
|
|
||||||
| :returns: SQL Alchemy engine | ||||||
|
|
@@ -658,10 +683,16 @@ def get_engine( | |||||
| ) | ||||||
|
|
||||||
| engine = create_engine( | ||||||
| conn_str, connect_args=connect_args, pool_pre_ping=True | ||||||
| conn_str, | ||||||
| connect_args=connect_args, | ||||||
| **dict(pool_options) | ||||||
| ) | ||||||
|
|
||||||
| LOGGER.debug( | ||||||
| f'Created engine for {repr(engine.url)} ' | ||||||
| f'with pool options {dict(pool_options)}.' | ||||||
| ) | ||||||
|
|
||||||
| LOGGER.debug(f'Created engine for {repr(engine.url)}.') | ||||||
| return engine | ||||||
|
|
||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| # ================================================================= | ||
| # Tests for configurable SQLAlchemy connection-pool options on the | ||
| # SQL provider. These exercise store_db_parameters() directly and do | ||
| # not require a live database, so they run in standard CI. | ||
| # ================================================================= | ||
|
|
||
| import pytest | ||
|
|
||
| from pygeoapi.provider.sql import store_db_parameters | ||
|
|
||
|
|
||
| class _Dummy: | ||
| """Minimal stand-in for a provider/manager instance.""" | ||
| default_port = 5432 | ||
|
|
||
|
|
||
| CONN = {'host': 'h', 'dbname': 'd', 'user': 'u', 'password': 'p'} | ||
|
|
||
|
|
||
| def test_pool_options_defaults_preserve_current_behaviour(): | ||
| obj = _Dummy() | ||
| store_db_parameters(obj, dict(CONN), {}) | ||
| pool = dict(obj.db_pool_options) | ||
| # Defaults must match pre-existing effective behaviour: | ||
| # pool_pre_ping was hardcoded True; pool_recycle was unset (-1). | ||
| assert pool['pool_size'] == 5 | ||
| assert pool['max_overflow'] == 10 | ||
| assert pool['pool_timeout'] == 30 | ||
| assert pool['pool_pre_ping'] is True | ||
| assert pool['pool_recycle'] == -1 | ||
|
Comment on lines
+20
to
+30
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test seems unnecessary to me - when this PR gets merged, the behavior it implements will become the current behavior, so the test looses its relevancy. |
||
|
|
||
|
|
||
| def test_pool_options_are_overridable_and_typed(): | ||
| obj = _Dummy() | ||
| store_db_parameters( | ||
| obj, dict(CONN), | ||
| {'pool_size': 2, 'max_overflow': 3, 'pool_recycle': 300}, | ||
| ) | ||
| pool = dict(obj.db_pool_options) | ||
| assert pool['pool_size'] == 2 and isinstance(pool['pool_size'], int) | ||
| assert pool['max_overflow'] == 3 | ||
| assert pool['pool_recycle'] == 300 | ||
| # untouched keys keep defaults | ||
| assert pool['pool_timeout'] == 30 | ||
| assert pool['pool_pre_ping'] is True | ||
|
Comment on lines
+33
to
+45
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test would be unnecessary if you'd go with my suggestion above, of storing |
||
|
|
||
|
|
||
| def test_pool_options_not_leaked_to_dbapi_connect_args(): | ||
| obj = _Dummy() | ||
| store_db_parameters( | ||
| obj, dict(CONN), | ||
| {'connect_timeout': 10, 'pool_size': 2, 'pool_recycle': 300}, | ||
| ) | ||
| for k in ('pool_size', 'max_overflow', 'pool_recycle', | ||
| 'pool_timeout', 'pool_pre_ping'): | ||
| assert k not in obj.db_options | ||
| # genuine DBAPI connect args still pass through | ||
| assert obj.db_options['connect_timeout'] == 10 | ||
|
|
||
|
|
||
| def test_dict_valued_options_still_filtered(): | ||
| obj = _Dummy() | ||
| store_db_parameters( | ||
| obj, dict(CONN), | ||
| {'pool_size': 2, 'zoom': {'min': 0, 'max': 22}}, | ||
| ) | ||
| assert 'zoom' not in obj.db_options | ||
| assert dict(obj.db_pool_options)['pool_size'] == 2 | ||
|
|
||
|
Comment on lines
+61
to
+69
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test seems to be unnecessary, as it is not testing the changes you made in this PR. It verifies that the contents of IMO this PR does not make any changes that would warrant this verification, unless you would not trust the behavior of |
||
|
|
||
| def test_pool_options_hashable_and_deterministic(): | ||
| a, b = _Dummy(), _Dummy() | ||
| store_db_parameters(a, dict(CONN), {'pool_size': 2}) | ||
| store_db_parameters(b, dict(CONN), {'pool_size': 2}) | ||
| # identical config -> identical key -> shared engine via functools.cache | ||
| assert a.db_pool_options == b.db_pool_options | ||
| assert hash(a.db_pool_options) == hash(b.db_pool_options) | ||
|
|
||
| c = _Dummy() | ||
| store_db_parameters(c, dict(CONN), {'pool_size': 9}) | ||
| # differing pool config -> distinct key (separate engine, by design) | ||
| assert c.db_pool_options != a.db_pool_options | ||
|
|
||
|
Comment on lines
+71
to
+83
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is testing Python's own implementation of how tuples are hashed, so I don't think it is relevant to include in pygeoapi. |
||
|
|
||
| def test_pool_options_coexist_with_search_path(): | ||
| obj = _Dummy() | ||
| store_db_parameters( | ||
| obj, dict(CONN), | ||
| {'search_path': ['published', 'public'], 'pool_size': 4}, | ||
| ) | ||
| assert obj.db_search_path == ('published', 'public') | ||
| assert dict(obj.db_pool_options)['pool_size'] == 4 | ||
|
|
||
|
Comment on lines
+85
to
+93
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test seems unnecessary, as it does not test the functionality introduced in this PR |
||
|
|
||
| @pytest.mark.parametrize('bad', [{'pool_size': 'two'}]) | ||
| def test_non_integer_pool_value_raises(bad): | ||
| # type coercion surfaces bad config loudly rather than silently | ||
| with pytest.raises(ValueError): | ||
| store_db_parameters(_Dummy(), dict(CONN), bad) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All of these new parameters need to be added to the config schema at
pygeoapi/resources/schemas/config/pygeoapi-config-0.x.ymlThis will make it possible to test a pygeoapi configuration for correctness even before starting up the server.