From f55e36c0d945d3718fbf31721de078a10de2268e Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:17:41 -0800 Subject: [PATCH 01/15] Ignore more venv names --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f634e823..f001284b 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ htmlcov # common virtual environment names venv* env +.venv # editors .idea From 94eba4b6c9e208cc5970e83ad23c7ca1c2dcb2b4 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:22:52 -0800 Subject: [PATCH 02/15] Replace custom cachine with requests cache library --- pori_python/graphkb/match.py | 16 ------ pori_python/graphkb/util.py | 81 +++++++++++++++++++--------- tests/test_graphkb/test_statement.py | 2 +- 3 files changed, 56 insertions(+), 43 deletions(-) diff --git a/pori_python/graphkb/match.py b/pori_python/graphkb/match.py index 0c791383..d24bb06e 100644 --- a/pori_python/graphkb/match.py +++ b/pori_python/graphkb/match.py @@ -111,22 +111,6 @@ def get_equivalent_features( ) -def cache_missing_features(conn: GraphKBConnection) -> None: - """ - Create a cache of features that exist to avoid repeatedly querying - for missing features - """ - genes = cast( - List[Ontology], - conn.query({"target": "Feature", "returnProperties": ["name", "sourceId"], "neighbors": 0}), - ) - for gene in genes: - if gene["name"]: - FEATURES_CACHE.add(gene["name"].lower()) - if gene["sourceId"]: - FEATURES_CACHE.add(gene["sourceId"].lower()) - - def match_category_variant( conn: GraphKBConnection, reference_name: str, diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 85bfa517..84ab0a6b 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -8,6 +8,7 @@ import re import time from datetime import datetime +from requests_cache import CacheMixin from typing import Any, Dict, Iterable, List, Optional, Union, cast from urllib3.util.retry import Retry from urllib.parse import urlsplit @@ -88,11 +89,8 @@ def millis_interval(start: datetime, end: datetime) -> int: return millis -def cache_key(request_body) -> str: - """Create a cache key for a query request to GraphKB.""" - body = json.dumps(request_body, sort_keys=True) - hash_code = hashlib.md5(f"/query{body}".encode("utf-8")).hexdigest() - return hash_code +class CustomSession(CacheMixin, requests.Session): + pass class GraphKBConnection: @@ -102,8 +100,32 @@ def __init__( username: str = "", password: str = "", use_global_cache: bool = True, + cache_name: str = "", ): - self.http = requests.Session() + """ + Docstring for __init__ + + Args: + - use_global_cache: cache requests across all requests to GKB + - cache_db: Path or connection URL to the database which stors the requests cache. see https://requests-cache.readthedocs.io/en/v0.6.4/user_guide.html#cache-name + """ + if use_global_cache: + if not cache_name: + self.http = CustomSession( + backend="memory", + cache_control=True, + allowable_methods=["GET", "POST"], + ignored_parameters=["Authorization"], + ) + else: + self.http = CustomSession( + cache_name, + cache_control=True, + allowable_methods=["GET", "POST"], + ignored_parameters=["Authorization"], + ) + else: + self.http = requests.Session() retries = Retry( total=100, connect=5, @@ -117,8 +139,10 @@ def __init__( self.url = url self.username = username self.password = password - self.headers = {"Accept": "application/json", "Content-Type": "application/json"} - self.cache: Dict[Any, Any] = {} if not use_global_cache else QUERY_CACHE + self.headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } self.request_count = 0 self.first_request: Optional[datetime] = None self.last_request: Optional[datetime] = None @@ -137,7 +161,13 @@ def load(self) -> Optional[float]: return self.request_count * 1000 / msec return None - def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: + def request( + self, + endpoint: str, + method: str = "GET", + headers: Optional[dict[str, str]] = None, + **kwargs, + ) -> Dict: """Request wrapper to handle adding common headers and logging. Args: @@ -158,6 +188,11 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: if endpoint in ["query", "parse"]: timeout = (connect_timeout, read_timeout) + request_headers = {} + request_headers.update(self.headers) + if headers is not None: + request_headers.update(headers) + start_time = datetime.now() if not self.first_request: @@ -180,8 +215,8 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict: need_refresh_login = False self.request_count += 1 - resp = requests.request( - method, url, headers=self.headers, timeout=timeout, **kwargs + resp = self.http.request( + method, url, headers=request_headers, timeout=timeout, **kwargs ) if resp.status_code == 401 or resp.status_code == 403: logger.debug(f"/{endpoint} - {resp.status_code} - retrying") @@ -294,11 +329,6 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None: def refresh_login(self) -> None: self.login(self.username, self.password) - def set_cache_data(self, request_body: Dict, result: List[Record]) -> None: - """Explicitly add a query to the cache.""" - hash_code = cache_key(request_body) - self.cache[hash_code] = result - def query( self, request_body: Dict = {}, @@ -310,23 +340,22 @@ def query( """ Query GraphKB """ - result: List[Record] = [] - hash_code = "" - - if not ignore_cache and paginate: - hash_code = cache_key(request_body) - if hash_code in self.cache and not force_refresh: - return self.cache[hash_code] + headers = {} + if ignore_cache or force_refresh: + headers = {"Cache-Control": "no-cache"} + result: List[Record] = [] while True: - content = self.post("query", data={**request_body, "limit": limit, "skip": len(result)}) + content = self.post( + "query", + data={**request_body, "limit": limit, "skip": len(result)}, + headers=headers, + ) records = content["result"] result.extend(records) if len(records) < limit or not paginate: break - if not ignore_cache and paginate: - self.cache[hash_code] = result return result def parse(self, hgvs_string: str, requireFeatures: bool = False) -> ParsedVariant: diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py index fcf0bef5..49b15a27 100644 --- a/tests/test_graphkb/test_statement.py +++ b/tests/test_graphkb/test_statement.py @@ -41,7 +41,7 @@ def term_tree_calls(*final_values): query_mock = Mock() query_mock.side_effect = return_values - return Mock(query=query_mock, cache={}) + return Mock(query=query_mock) class TestCategorizeRelevance: From 21a224bdde59e022a8ee409c214e84c0e51b5b69 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:36:53 -0800 Subject: [PATCH 03/15] Add requests cache pkg to requirements --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index b5d3df06..a22e99ce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,7 @@ install_requires = requests tqdm typing_extensions>=3.7.4.2,<5 + requests-cache[sqlite] [options.extras_require] deploy = twine; wheel; m2r From c327f6fafbf80a50a5b145a839efdb477280e8bf Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 12:50:30 -0800 Subject: [PATCH 04/15] Remove leftover references to old cache --- pori_python/graphkb/vocab.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pori_python/graphkb/vocab.py b/pori_python/graphkb/vocab.py index 26033e75..3242a06c 100644 --- a/pori_python/graphkb/vocab.py +++ b/pori_python/graphkb/vocab.py @@ -181,9 +181,6 @@ def get_terms_set( ) -> Set[str]: """Get a set of vocabulary rids given some base/parent term names.""" base_terms = [base_terms] if isinstance(base_terms, str) else base_terms - cache_key = tuple(sorted(base_terms)) - if graphkb_conn.cache.get(cache_key, None) and not ignore_cache: - return graphkb_conn.cache[cache_key] terms = set() for base_term in base_terms: terms.update( @@ -193,6 +190,4 @@ def get_terms_set( ) ) ) - if not ignore_cache: - graphkb_conn.cache[cache_key] = terms return terms From d7db900468bdf09e84902fad551298747b81f93c Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Mon, 9 Feb 2026 13:07:04 -0800 Subject: [PATCH 05/15] Remove no longer relevant test --- tests/test_graphkb/test_match.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/test_graphkb/test_match.py b/tests/test_graphkb/test_match.py index d4ee5679..00798143 100644 --- a/tests/test_graphkb/test_match.py +++ b/tests/test_graphkb/test_match.py @@ -563,22 +563,6 @@ def test_structural_variants(self, conn): assert type not in expected.get("does_not_matches", {}).get("type", []) -class TestCacheMissingFeatures: - def test_filling_cache(self): - mock_conn = MagicMock( - query=MagicMock( - return_value=[ - {"name": "bob", "sourceId": "alice"}, - {"name": "KRAS", "sourceId": "1234"}, - ] - ) - ) - match.cache_missing_features(mock_conn) - assert "kras" in match.FEATURES_CACHE - assert "alice" in match.FEATURES_CACHE - match.FEATURES_CACHE = None - - class TestTypeScreening: # Types as class variables default_type = DEFAULT_NON_STRUCTURAL_VARIANT_TYPE From 5f80d1ae25aeb87b81b055bff5b162e6def48d63 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 21:30:46 -0800 Subject: [PATCH 06/15] Fix missing merge conflict --- pori_python/graphkb/util.py | 48 ++++++++++--------------------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 2c1e7e00..a0001347 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -100,7 +100,7 @@ def __init__( username: str = '', password: str = '', use_global_cache: bool = True, - cache_name: str = "", + cache_name: str = '', ): """ Docstring for __init__ @@ -112,17 +112,17 @@ def __init__( if use_global_cache: if not cache_name: self.http = CustomSession( - backend="memory", + backend='memory', cache_control=True, - allowable_methods=["GET", "POST"], - ignored_parameters=["Authorization"], + allowable_methods=['GET', 'POST'], + ignored_parameters=['Authorization'], ) else: self.http = CustomSession( cache_name, cache_control=True, - allowable_methods=["GET", "POST"], - ignored_parameters=["Authorization"], + allowable_methods=['GET', 'POST'], + ignored_parameters=['Authorization'], ) else: self.http = requests.Session() @@ -139,15 +139,10 @@ def __init__( self.url = url self.username = username self.password = password -<<<<<<< HEAD self.headers = { - "Accept": "application/json", - "Content-Type": "application/json", + 'Accept': 'application/json', + 'Content-Type': 'application/json', } -======= - self.headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} - self.cache: Dict[Any, Any] = {} if not use_global_cache else QUERY_CACHE ->>>>>>> develop self.request_count = 0 self.first_request: Optional[datetime] = None self.last_request: Optional[datetime] = None @@ -166,17 +161,13 @@ def load(self) -> Optional[float]: return self.request_count * 1000 / msec return None -<<<<<<< HEAD def request( self, endpoint: str, - method: str = "GET", + method: str = 'GET', headers: Optional[dict[str, str]] = None, **kwargs, ) -> Dict: -======= - def request(self, endpoint: str, method: str = 'GET', **kwargs) -> Dict: ->>>>>>> develop """Request wrapper to handle adding common headers and logging. Args: @@ -348,33 +339,18 @@ def query( """ Query GraphKB """ -<<<<<<< HEAD headers = {} if ignore_cache or force_refresh: - headers = {"Cache-Control": "no-cache"} -======= - result: List[Record] = [] - hash_code = '' - - if not ignore_cache and paginate: - hash_code = cache_key(request_body) - if hash_code in self.cache and not force_refresh: - return self.cache[hash_code] ->>>>>>> develop + headers = {'Cache-Control': 'no-cache'} result: List[Record] = [] while True: -<<<<<<< HEAD content = self.post( - "query", - data={**request_body, "limit": limit, "skip": len(result)}, + 'query', + data={**request_body, 'limit': limit, 'skip': len(result)}, headers=headers, ) - records = content["result"] -======= - content = self.post('query', data={**request_body, 'limit': limit, 'skip': len(result)}) records = content['result'] ->>>>>>> develop result.extend(records) if len(records) < limit or not paginate: break From df2d899e6380d7568570734bfaa1fe8551f1a226 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:10:58 -0800 Subject: [PATCH 07/15] fix: due to change in caching, the mock now needs to be re setup between calls so I split the tests into two functions --- pori_python/graphkb/util.py | 2 -- tests/test_graphkb/test_statement.py | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index a0001347..ba54a07a 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -17,8 +17,6 @@ from .constants import DEFAULT_LIMIT, TYPES_TO_NOTATION, AA_3to1_MAPPING -QUERY_CACHE: Dict[Any, Any] = {} - # name the logger after the package to make it simple to disable for packages using this one as a dependency # https://stackoverflow.com/questions/11029717/how-do-i-disable-log-messages-from-the-requests-library diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py index 0b4ccd22..00720e90 100644 --- a/tests/test_graphkb/test_statement.py +++ b/tests/test_graphkb/test_statement.py @@ -17,11 +17,20 @@ def conn() -> GraphKBConnection: @pytest.fixture() def graphkb_conn(): + """ + Mocks the query functionality required by the calls made for the categorize_relevance function + + categorize_relevance calls query twice for each term in the values of the category_base_terms object + + - get_terms_set ([term, term, term...]) + - get_term_tree(term) + - query(query(term)) + """ def make_rid_list(*values): return [{'@rid': v} for v in values] def term_tree_calls(*final_values): - # this function makes 2 calls to conn.query here + # this function makes 2 calls to conn.query here b/c the get_terms_set function will always call query twice sets = [['fake'], final_values] return [make_rid_list(*s) for s in sets] @@ -77,18 +86,18 @@ def test_no_match(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, 'x') assert category == '' - def test_custom_categories(self, graphkb_conn): + def test_custom_categories_not_found(self, graphkb_conn): category = statement.categorize_relevance( graphkb_conn, 'x', [('blargh', ['some', 'blargh'])] ) assert category == '' + def test_custom_categories_match(self, graphkb_conn): category = statement.categorize_relevance( graphkb_conn, '1', [('blargh', ['some', 'blargh'])] ) assert category == 'blargh' - @pytest.mark.skipif( EXCLUDE_BCGSC_TESTS, reason='db-specific rid; requires Inferred Functional Annotation source' ) From 1107f68499787a236ec25afbddde306f111e0106 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:11:54 -0800 Subject: [PATCH 08/15] format with ruff --- tests/test_graphkb/test_statement.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_graphkb/test_statement.py b/tests/test_graphkb/test_statement.py index 00720e90..550191ed 100644 --- a/tests/test_graphkb/test_statement.py +++ b/tests/test_graphkb/test_statement.py @@ -26,6 +26,7 @@ def graphkb_conn(): - get_term_tree(term) - query(query(term)) """ + def make_rid_list(*values): return [{'@rid': v} for v in values] @@ -98,6 +99,7 @@ def test_custom_categories_match(self, graphkb_conn): ) assert category == 'blargh' + @pytest.mark.skipif( EXCLUDE_BCGSC_TESTS, reason='db-specific rid; requires Inferred Functional Annotation source' ) From 5e60ddc5c52c9991aa369aae0748b624c7a41382 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:29:59 -0800 Subject: [PATCH 09/15] Add pass-through args for sessions --- pori_python/graphkb/util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index ba54a07a..8cfe79d4 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -99,6 +99,7 @@ def __init__( password: str = '', use_global_cache: bool = True, cache_name: str = '', + **session_kwargs ): """ Docstring for __init__ @@ -114,6 +115,7 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], + **session_kwargs ) else: self.http = CustomSession( @@ -121,9 +123,10 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], + **session_kwargs ) else: - self.http = requests.Session() + self.http = requests.Session(**session_kwargs) retries = Retry( total=100, connect=5, From ae37e3bfee34cc97e9167fe187508d53e5fb07d2 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:31:29 -0800 Subject: [PATCH 10/15] fix arg in docstring to match arg name --- pori_python/graphkb/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 8cfe79d4..60ce6992 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -106,7 +106,7 @@ def __init__( Args: - use_global_cache: cache requests across all requests to GKB - - cache_db: Path or connection URL to the database which stors the requests cache. see https://requests-cache.readthedocs.io/en/v0.6.4/user_guide.html#cache-name + - cache_name: Path or connection URL to the database which stors the requests cache. see https://requests-cache.readthedocs.io/en/v0.6.4/user_guide.html#cache-name """ if use_global_cache: if not cache_name: From 135df0390be00de7cc5ebe634a3350ca180947a1 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Wed, 11 Feb 2026 23:32:54 -0800 Subject: [PATCH 11/15] format with ruff --- pori_python/graphkb/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 60ce6992..382dba63 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -99,7 +99,7 @@ def __init__( password: str = '', use_global_cache: bool = True, cache_name: str = '', - **session_kwargs + **session_kwargs, ): """ Docstring for __init__ @@ -115,7 +115,7 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], - **session_kwargs + **session_kwargs, ) else: self.http = CustomSession( @@ -123,7 +123,7 @@ def __init__( cache_control=True, allowable_methods=['GET', 'POST'], ignored_parameters=['Authorization'], - **session_kwargs + **session_kwargs, ) else: self.http = requests.Session(**session_kwargs) From 02d5034216ba9e1b13913861a07984995494f438 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Fri, 1 May 2026 15:38:02 -0700 Subject: [PATCH 12/15] Include rate limiting by default --- pori_python/graphkb/util.py | 111 ++++++++++++++++++++++++------------ setup.cfg | 7 +++ 2 files changed, 82 insertions(+), 36 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index 382dba63..f9705af4 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -8,7 +8,9 @@ import re import time from datetime import datetime +from pyrate_limiter import Duration, Limiter, Rate from requests_cache import CacheMixin +from requests_ratelimiter import LimiterMixin from typing import Any, Dict, Iterable, List, Optional, Union, cast from urllib3.util.retry import Retry from urllib.parse import urlsplit @@ -20,7 +22,8 @@ # name the logger after the package to make it simple to disable for packages using this one as a dependency # https://stackoverflow.com/questions/11029717/how-do-i-disable-log-messages-from-the-requests-library -logger = logging.getLogger('graphkb') +logger = logging.getLogger("graphkb") +LIMITER = Limiter(Rate(3, Duration.SECOND)) def convert_to_rid_list(records: Iterable[Record]) -> List[str]: @@ -65,7 +68,7 @@ def convert_aa_3to1(three_letter_notation: str) -> str: last_match_end = match.end() result.append(three_letter_notation[last_match_end:]) - return ''.join(result) + return "".join(result) def join_url(base_url: str, *parts) -> str: @@ -87,18 +90,25 @@ def millis_interval(start: datetime, end: datetime) -> int: return millis -class CustomSession(CacheMixin, requests.Session): +class CachedSession(CacheMixin, requests.Session): + pass + + +class CachedLimiterSession(LimiterMixin, CachedSession): pass class GraphKBConnection: def __init__( self, - url: str = os.environ.get('GRAPHKB_URL'), - username: str = '', - password: str = '', + url: str = os.environ.get("GRAPHKB_URL"), + username: str = "", + password: str = "", use_global_cache: bool = True, - cache_name: str = '', + cache_name: str = "", + only_if_cached: bool = False, + session: Optional[requests.Session] = None, + limiter: Limiter = LIMITER, **session_kwargs, ): """ @@ -107,26 +117,48 @@ def __init__( Args: - use_global_cache: cache requests across all requests to GKB - cache_name: Path or connection URL to the database which stors the requests cache. see https://requests-cache.readthedocs.io/en/v0.6.4/user_guide.html#cache-name + - only_if_cached: this will set the cache-control header for all requests to only-if-cached which will raise 504 errors if a request does not exist in the cache already rather than making a new network request """ + session_cls = requests.Session + if limiter and not use_global_cache: + raise NotImplementedError( + f"currently rate limiting by default also implements caching" + ) + if session is not None: + if limiter is not None: + raise NotImplementedError("cannot add limiter to an existing session") + if use_global_cache: + raise NotImplementedError( + "the use_global_cache parameter should not be used with a custom session" + ) + if cache_name: + raise NotImplementedError( + "cache_name should not be used with a custom input session" + ) + if not use_global_cache and cache_name: + raise NotImplementedError( + "cache_name only applies when use_global_cache is True" + ) + if use_global_cache: if not cache_name: - self.http = CustomSession( - backend='memory', - cache_control=True, - allowable_methods=['GET', 'POST'], - ignored_parameters=['Authorization'], - **session_kwargs, - ) + session_kwargs["backend"] = "memory" else: - self.http = CustomSession( - cache_name, - cache_control=True, - allowable_methods=['GET', 'POST'], - ignored_parameters=['Authorization'], - **session_kwargs, - ) + session_kwargs["cache_name"] = cache_name + session_kwargs["allowable_methods"] = ["GET", "POST"] + session_kwargs["ignored_parameters"] = ["Authorization"] + session_kwargs["cache_control"] = True + session_cls = CachedSession + + if "PYTEST_CURRENT_TEST" not in os.environ: + if limiter: + session_kwargs["limiter"] = limiter + session_cls = CachedLimiterSession + + if not session: + self.http = session_cls(**session_kwargs) else: - self.http = requests.Session(**session_kwargs) + self.http = session retries = Retry( total=100, connect=5, @@ -134,15 +166,16 @@ def __init__( backoff_factor=5, status_forcelist=[429, 500, 502, 503, 504], ) - self.http.mount('https://', HTTPAdapter(max_retries=retries)) - self.token = '' - self.token_kc = '' + self.only_if_cached = only_if_cached + self.http.mount("https://", HTTPAdapter(max_retries=retries)) + self.token = "" + self.token_kc = "" self.url = url self.username = username self.password = password self.headers = { - 'Accept': 'application/json', - 'Content-Type': 'application/json', + "Accept": "application/json", + "Content-Type": "application/json", } self.request_count = 0 self.first_request: Optional[datetime] = None @@ -165,8 +198,11 @@ def load(self) -> Optional[float]: def request( self, endpoint: str, - method: str = 'GET', + method: str = "GET", headers: Optional[dict[str, str]] = None, + ignore_cache=False, + force_refresh=False, + only_if_cached=False, **kwargs, ) -> Dict: """Request wrapper to handle adding common headers and logging. @@ -178,6 +214,13 @@ def request( Returns: dict: the json response as a python dict """ + if headers is None: + headers = {} + + if ignore_cache or force_refresh: + headers["Cache-Control"] = "no-cache" + elif only_if_cached or self.only_if_cached: + headers["Cache-Control"] = "only-if-cached" url = join_url(self.url, endpoint) self.request_count += 1 connect_timeout = 7 @@ -333,23 +376,19 @@ def query( self, request_body: Dict = {}, paginate: bool = True, - ignore_cache: bool = False, - force_refresh: bool = False, limit: int = DEFAULT_LIMIT, + **kwargs, ) -> List[Record]: """ Query GraphKB """ - headers = {} - if ignore_cache or force_refresh: - headers = {'Cache-Control': 'no-cache'} result: List[Record] = [] while True: content = self.post( - 'query', - data={**request_body, 'limit': limit, 'skip': len(result)}, - headers=headers, + "query", + data={**request_body, "limit": limit, "skip": len(result)}, + **kwargs, ) records = content['result'] result.extend(records) diff --git a/setup.cfg b/setup.cfg index 3f2c365b..c4ec7157 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,8 @@ install_requires = tqdm typing_extensions>=3.7.4.2,<5 requests-cache[sqlite] + requests-ratelimiter + pyrate-limiter [options.extras_require] deploy = twine; wheel; m2r @@ -63,3 +65,8 @@ pori_python = py.typed [options.entry_points] console_scripts = ipr = pori_python.ipr.main:command_interface + + +[tool:pytest] +log_cli = true +log_cli_level = INFO From 93c1b481cbd92e23e30fb0e17271ae65937f7be7 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Fri, 1 May 2026 15:45:04 -0700 Subject: [PATCH 13/15] Format with ruff --- pori_python/graphkb/util.py | 64 ++++++++++++++++++------------------- pori_python/ipr/main.py | 2 +- tests/test_ipr/test_main.py | 1 - 3 files changed, 32 insertions(+), 35 deletions(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index f9705af4..cf26e070 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -22,7 +22,7 @@ # name the logger after the package to make it simple to disable for packages using this one as a dependency # https://stackoverflow.com/questions/11029717/how-do-i-disable-log-messages-from-the-requests-library -logger = logging.getLogger("graphkb") +logger = logging.getLogger('graphkb') LIMITER = Limiter(Rate(3, Duration.SECOND)) @@ -68,7 +68,7 @@ def convert_aa_3to1(three_letter_notation: str) -> str: last_match_end = match.end() result.append(three_letter_notation[last_match_end:]) - return "".join(result) + return ''.join(result) def join_url(base_url: str, *parts) -> str: @@ -101,11 +101,11 @@ class CachedLimiterSession(LimiterMixin, CachedSession): class GraphKBConnection: def __init__( self, - url: str = os.environ.get("GRAPHKB_URL"), - username: str = "", - password: str = "", + url: str = os.environ.get('GRAPHKB_URL'), + username: str = '', + password: str = '', use_global_cache: bool = True, - cache_name: str = "", + cache_name: str = '', only_if_cached: bool = False, session: Optional[requests.Session] = None, limiter: Limiter = LIMITER, @@ -121,38 +121,34 @@ def __init__( """ session_cls = requests.Session if limiter and not use_global_cache: - raise NotImplementedError( - f"currently rate limiting by default also implements caching" - ) + raise NotImplementedError(f'currently rate limiting by default also implements caching') if session is not None: if limiter is not None: - raise NotImplementedError("cannot add limiter to an existing session") + raise NotImplementedError('cannot add limiter to an existing session') if use_global_cache: raise NotImplementedError( - "the use_global_cache parameter should not be used with a custom session" + 'the use_global_cache parameter should not be used with a custom session' ) if cache_name: raise NotImplementedError( - "cache_name should not be used with a custom input session" + 'cache_name should not be used with a custom input session' ) if not use_global_cache and cache_name: - raise NotImplementedError( - "cache_name only applies when use_global_cache is True" - ) + raise NotImplementedError('cache_name only applies when use_global_cache is True') if use_global_cache: if not cache_name: - session_kwargs["backend"] = "memory" + session_kwargs['backend'] = 'memory' else: - session_kwargs["cache_name"] = cache_name - session_kwargs["allowable_methods"] = ["GET", "POST"] - session_kwargs["ignored_parameters"] = ["Authorization"] - session_kwargs["cache_control"] = True + session_kwargs['cache_name'] = cache_name + session_kwargs['allowable_methods'] = ['GET', 'POST'] + session_kwargs['ignored_parameters'] = ['Authorization'] + session_kwargs['cache_control'] = True session_cls = CachedSession - if "PYTEST_CURRENT_TEST" not in os.environ: + if 'PYTEST_CURRENT_TEST' not in os.environ: if limiter: - session_kwargs["limiter"] = limiter + session_kwargs['limiter'] = limiter session_cls = CachedLimiterSession if not session: @@ -167,15 +163,15 @@ def __init__( status_forcelist=[429, 500, 502, 503, 504], ) self.only_if_cached = only_if_cached - self.http.mount("https://", HTTPAdapter(max_retries=retries)) - self.token = "" - self.token_kc = "" + self.http.mount('https://', HTTPAdapter(max_retries=retries)) + self.token = '' + self.token_kc = '' self.url = url self.username = username self.password = password self.headers = { - "Accept": "application/json", - "Content-Type": "application/json", + 'Accept': 'application/json', + 'Content-Type': 'application/json', } self.request_count = 0 self.first_request: Optional[datetime] = None @@ -198,7 +194,7 @@ def load(self) -> Optional[float]: def request( self, endpoint: str, - method: str = "GET", + method: str = 'GET', headers: Optional[dict[str, str]] = None, ignore_cache=False, force_refresh=False, @@ -218,9 +214,9 @@ def request( headers = {} if ignore_cache or force_refresh: - headers["Cache-Control"] = "no-cache" + headers['Cache-Control'] = 'no-cache' elif only_if_cached or self.only_if_cached: - headers["Cache-Control"] = "only-if-cached" + headers['Cache-Control'] = 'only-if-cached' url = join_url(self.url, endpoint) self.request_count += 1 connect_timeout = 7 @@ -386,8 +382,8 @@ def query( result: List[Record] = [] while True: content = self.post( - "query", - data={**request_body, "limit": limit, "skip": len(result)}, + 'query', + data={**request_body, 'limit': limit, 'skip': len(result)}, **kwargs, ) records = content['result'] @@ -501,7 +497,9 @@ def stripDisplayName(displayName: str, withRef: bool = True, withRefSeq: bool = def stringifyVariant( - variant: Union[PositionalVariant, ParsedVariant], withRef: bool = True, withRefSeq: bool = True + variant: Union[PositionalVariant, ParsedVariant], + withRef: bool = True, + withRefSeq: bool = True, ) -> str: """ Convert variant record to a string representation (displayName/hgvs) diff --git a/pori_python/ipr/main.py b/pori_python/ipr/main.py index cbb7c128..20098b7f 100644 --- a/pori_python/ipr/main.py +++ b/pori_python/ipr/main.py @@ -31,8 +31,8 @@ preprocess_cosmic, preprocess_expression_variants, preprocess_hla, - preprocess_msi, preprocess_hrd, + preprocess_msi, preprocess_signature_variants, preprocess_small_mutations, preprocess_structural_variants, diff --git a/tests/test_ipr/test_main.py b/tests/test_ipr/test_main.py index 8fe585cd..cd161e18 100644 --- a/tests/test_ipr/test_main.py +++ b/tests/test_ipr/test_main.py @@ -122,7 +122,6 @@ def side_effect_function(*args, **kwargs): return report_content -@pytest.mark.skip(reason='KBDEV-1308; taking too long, getting canceled after reaching max delay') @pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason='excluding long running integration tests') class TestCreateReport: def test_main_sections_present(self, report_upload_content: Dict) -> None: From bc15dfc90651bc5bd9bdef8fa24611a36b25e569 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Fri, 1 May 2026 15:48:31 -0700 Subject: [PATCH 14/15] fallback import for older versions of pyrate-limiter --- pori_python/graphkb/util.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pori_python/graphkb/util.py b/pori_python/graphkb/util.py index cf26e070..f4132fa9 100644 --- a/pori_python/graphkb/util.py +++ b/pori_python/graphkb/util.py @@ -8,7 +8,13 @@ import re import time from datetime import datetime -from pyrate_limiter import Duration, Limiter, Rate +from pyrate_limiter import Duration, Limiter + +try: + from pyrate_limiter import Rate +except ImportError: + from pyrate_limiter import RequestRate as Rate + from requests_cache import CacheMixin from requests_ratelimiter import LimiterMixin from typing import Any, Dict, Iterable, List, Optional, Union, cast From 03994d9251f0f2cbe5298fb6ba8f13b858071fb2 Mon Sep 17 00:00:00 2001 From: Caralyn Reisle Date: Fri, 1 May 2026 16:04:40 -0700 Subject: [PATCH 15/15] only run integration tests on 3.11 and report coverage for 3.11 --- .github/workflows/pytest.yml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 7804a8a0..27a0d28c 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -32,6 +32,18 @@ jobs: run: | pip install ruff ruff format --check pori_python tests + - name: short tests + run: | + pip list + pytest --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov pori_python --cov-report term --cov-report xml + env: + IPR_USER: ${{ secrets.IPR_TEST_USER }} + IPR_PASS: ${{ secrets.IPR_TEST_PASSWORD }} + GRAPHKB_USER: ${{ secrets.GKB_TEST_USER }} + GRAPHKB_PASS: ${{ secrets.GKB_TEST_PASS }} + GRAPHKB_URL: ${{ secrets.GKB_TEST_URL }} + EXCLUDE_INTEGRATION_TESTS: 1 + if: matrix.python-version != 3.11 - name: Full Tests with pytest run: | pip list @@ -42,16 +54,14 @@ jobs: GRAPHKB_USER: ${{ secrets.GKB_TEST_USER }} GRAPHKB_PASS: ${{ secrets.GKB_TEST_PASS }} GRAPHKB_URL: ${{ secrets.GKB_TEST_URL }} - # SDEV-3381 - Turn off integration tests temporarily, till efficiency is increased - # turn on integration tests for one python version only - EXCLUDE_INTEGRATION_TESTS: ${{ matrix.python-version != '3.11' }} + if: matrix.python-version == 3.11 - name: Upload pytest test results uses: actions/upload-artifact@master with: name: pytest-results-${{ matrix.python-version }} path: junit/test-results-${{ matrix.python-version }}.xml # Use always() to always run this step to publish test results when there are test failures - if: matrix.python-version == 3.9 + if: matrix.python-version == 3.11 - name: Update code coverage report to CodeCov uses: codecov/codecov-action@v3 with: @@ -61,4 +71,4 @@ jobs: env_vars: OS,PYTHON name: codecov-umbrella fail_ci_if_error: true - if: matrix.python-version == 3.9 + if: matrix.python-version == 3.11