Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
ca8d5f0
Update get_cancer_genes() to fit consensus defenition of cancer gene
mathieulemieux May 11, 2026
bca3e1d
linting
mathieulemieux May 11, 2026
d6f7c9a
Add tumourigenesis to CANCER_GENE for backward compatibility
mathieulemieux May 12, 2026
458f88f
Use Union in type hint instead of pipe
mathieulemieux May 12, 2026
9942425
Add get_related_records() and get_related_terms() to GraphKBConnection
mathieulemieux May 13, 2026
f13cf6b
Refactor get_cancer_genes() to use get_related_terms()
mathieulemieux May 13, 2026
f3a7f53
Add get_cancer_gene_flags()
mathieulemieux May 13, 2026
27ec429
Deprecate _get_tumourigenesis_genes_list(), get_oncokb_oncogenes(), g…
mathieulemieux May 13, 2026
22f5b41
Update get_gene_information() to use get_cancer_gene_flags()
mathieulemieux May 13, 2026
2e56ea8
Add equivalent gene name caching to get_gene_information()
mathieulemieux May 13, 2026
0713931
Revert equivalent gene names caching in get_gene_information()
mathieulemieux May 14, 2026
5c3fe7a
Add test to test_cancer_gene_flags()
mathieulemieux May 14, 2026
977c1a8
Fix typo
mathieulemieux May 15, 2026
03ebef1
Merge branch 'develop' into task/KBDEV-1532-cancer-gene-traverse-chil…
mathieulemieux May 15, 2026
482ea30
Fix formatting in get_cancer_gene_flags()
mathieulemieux May 15, 2026
d8a68fa
Refactor unique gene filtering in get_cancer_gene_flags()
mathieulemieux May 15, 2026
49aaeaa
Fix _get_tumourigenesis_genes_list() signature
mathieulemieux May 15, 2026
aa2bb7f
Fix typo
mathieulemieux May 15, 2026
9a5eb03
Fix get_related_records() & get_related_terms() signatures
mathieulemieux May 15, 2026
27b8ed5
Simplify filtering in get_related_terms()
mathieulemieux May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pori_python/graphkb/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@
TSO500_SOURCE_NAME = 'tso500'
ONCOGENE = 'oncogenic'
TUMOUR_SUPPRESSIVE = 'tumour suppressive'
CANCER_GENE = 'cancer gene'
CANCER_GENE = [
'cancer gene',
'tumourigenesis',
] # KBDEV-1532. tumourigenesis for backward compatibility
FUSION_NAMES = ['structural variant', 'fusion']

GSC_PHARMACOGENOMIC_SOURCE_EXCLUDE_LIST = ['cancer genome interpreter', 'civic']
Expand Down
142 changes: 130 additions & 12 deletions pori_python/graphkb/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import Any, Dict, List, Sequence, Set, Tuple, cast
from typing import Any, Dict, List, Sequence, Set, Tuple, cast, Union
from typing_extensions import deprecated

from pori_python.types import IprGene, Ontology, Record, Statement, Variant
Expand All @@ -27,8 +27,117 @@
from .vocab import get_terms_set


def get_cancer_gene_flags(
conn: GraphKBConnection,
flags: bool = False,
ignore_cache: bool = False,
) -> Union[List[Record], Dict[str, List[Record]]]:
"""
Return all cancer genes, optionally sorted by flags.

Flag definitions:
oncogenic: relevance 'oncogenic' from OncoKB
tumourSuppressive: relevance 'tumour suppressive' from OncoKB
cancerGene: relevance 'cancer gene' AND child terms ('oncogenic', 'tumour suppressive', 'other cancer gene'), from OncoKB AND TSO500

Args:
conn: the graphkb connection object
flags: if the results should be sorted by flags
ignore_cache: if cache should be ignored when querying GraphKB API

Returns (if flags=False; default): list of unique gene records
[ <record>, <record>, ... ]

Returns (if flags=True): dict of flags as keys, and list of gene records as value
{
'oncogenic': [ <record>, <record>, ... ],
'tumourSuppressive': [ <record>, <record>, ... ],
'cancerGene': [ <record>, <record>, ... ],
}
"""
# all cancer gene statements
cancer_genes = conn.get_related_terms(
terms=CANCER_GENE,
subgraphType='children',
)
statements = cast(
List[Statement],
conn.query(
{
'target': 'Statement',
'filters': {
'relevance': {'target': 'Vocabulary', 'filters': {'name': cancer_genes}}
},
'returnProperties': [
'source.name',
'relevance.name',
*[f'subject.{prop}' for prop in GENE_RETURN_PROPERTIES],
],
},
ignore_cache=ignore_cache,
),
)

# post-query filtering (faster)
cancerGeneStms = list(
filter(
lambda r: (
r['subject']['@class'] == 'Feature'
and r['subject']['biotype'] == 'gene'
and r['source']['name'] in [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME]
),
statements,
)
)
oncogenicStms = list(
filter(
lambda r: (
r['relevance']['name'] == ONCOGENE and r['source']['name'] == ONCOKB_SOURCE_NAME
),
cancerGeneStms,
)
)
tumourSuppressiveStms = list(
filter(
lambda r: (
r['relevance']['name'] == TUMOUR_SUPPRESSIVE
and r['source']['name'] == ONCOKB_SOURCE_NAME
),
cancerGeneStms,
)
)

# Returning a sorted list of unique gene records, based on iProbe requirements
# Unique by name, sorted by displayName
if not flags:
seen: set = set()
unique_genes: List[Record] = []
for r in cancerGeneStms:
name = r['subject']['name']
if name not in seen:
seen.add(name)
unique_genes.append(r['subject'])

return cast(
List[Record],
sorted(unique_genes, key=lambda gene: gene['displayName']),
)

# Returning a Dict of flags, with list of associated gene records
# Duplicates are ok
return {
'cancerGene': [r['subject'] for r in cancerGeneStms],
'oncogenic': [r['subject'] for r in oncogenicStms],
'tumourSuppressive': [r['subject'] for r in tumourSuppressiveStms],
}


@deprecated('functionality replaced by get_cancer_gene_flags')
def _get_tumourigenesis_genes_list(
conn: GraphKBConnection, relevance: str, sources: List[str], ignore_cache: bool = False
conn: GraphKBConnection,
relevance: Union[str, List[str]],
sources: Union[str, List[str]],
ignore_cache: bool = False,
) -> List[Ontology]:
statements = cast(
List[Statement],
Expand Down Expand Up @@ -57,6 +166,7 @@ def _get_tumourigenesis_genes_list(
return [gene for gene in genes.values()]


@deprecated('functionality replaced by get_cancer_gene_flags')
def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]:
"""Get the list of oncogenes stored in GraphKB derived from OncoKB.

Expand All @@ -66,9 +176,10 @@ def get_oncokb_oncogenes(conn: GraphKBConnection) -> List[Ontology]:
Returns:
gene (Feature) records
"""
return _get_tumourigenesis_genes_list(conn, ONCOGENE, [ONCOKB_SOURCE_NAME])
return _get_tumourigenesis_genes_list(conn, ONCOGENE, ONCOKB_SOURCE_NAME)


@deprecated('functionality replaced by get_cancer_gene_flags')
def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]:
"""Get the list of tumour supressor genes stored in GraphKB derived from OncoKB.

Expand All @@ -78,20 +189,27 @@ def get_oncokb_tumour_supressors(conn: GraphKBConnection) -> List[Ontology]:
Returns:
gene (Feature) records
"""
return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, [ONCOKB_SOURCE_NAME])
return _get_tumourigenesis_genes_list(conn, TUMOUR_SUPPRESSIVE, ONCOKB_SOURCE_NAME)


@deprecated('functionality replaced by get_cancer_gene_flags')
def get_cancer_genes(conn: GraphKBConnection) -> List[Ontology]:
"""Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500.
"""
Get the list of cancer genes stored in GraphKB derived from OncoKB & TSO500.
Cancer genes include oncogenes, tumour supressor genes and other cancer genes.

Args:
conn: the graphkb connection object

Returns:
gene (Feature) records
"""
cancer_gene_terms = conn.get_related_terms(
terms=CANCER_GENE,
subgraphType='children',
)
return _get_tumourigenesis_genes_list(
conn, CANCER_GENE, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME]
conn, cancer_gene_terms, [ONCOKB_SOURCE_NAME, TSO500_SOURCE_NAME]
)


Expand Down Expand Up @@ -513,12 +631,12 @@ def get_gene_information(
# PositionalVariant without a reference2 implies a smallMutation type
gene_flags['knownSmallMutation'].add(condition['reference1']) # type: ignore

logger.info('fetching oncogenes list')
gene_flags['oncogene'] = convert_to_rid_set(get_oncokb_oncogenes(graphkb_conn))
logger.info('fetching tumour supressors list')
gene_flags['tumourSuppressor'] = convert_to_rid_set(get_oncokb_tumour_supressors(graphkb_conn))
logger.info('fetching cancerGeneListMatch list')
gene_flags['cancerGeneListMatch'] = convert_to_rid_set(get_cancer_genes(graphkb_conn))
# cancer gene flags
logger.info('fetching cancer genes')
cancer_gene_flags = get_cancer_gene_flags(graphkb_conn, flags=True)
gene_flags['oncogene'] = convert_to_rid_set(cancer_gene_flags['oncogenic'])
gene_flags['tumourSuppressor'] = convert_to_rid_set(cancer_gene_flags['tumourSuppressive'])
gene_flags['cancerGeneListMatch'] = convert_to_rid_set(cancer_gene_flags['cancerGene'])

logger.info('fetching therapeutic associated genes lists')
gene_flags['therapeuticAssociated'] = convert_to_rid_set(
Expand Down
56 changes: 56 additions & 0 deletions pori_python/graphkb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,62 @@ def version(self) -> Dict[str, str]:
"""
return self.request('version')

def get_related_records(
self,
base: Union[str, List[str]],
ontology: str,
subgraphType: str,
returnProperties: Optional[List[str]] = None,
) -> List[Record]:
"""
Given some base node RIDs, an ontology class and a subgraph type,
leverage the subgraphs route to return the list of related nodes.

Args:
base: the base node RIDs to start the graph traversal from
ontology: the ontology class to traverse
subgraphType: the type of traversal. See options in API specs
returnProperties: additional record properties to return

Returns:
list of related node record(s) traversed
"""
related = self.post(
uri=f'/subgraphs/{ontology}',
data={
'base': base if isinstance(base, list) else [base],
'subgraphType': subgraphType,
'returnProperties': returnProperties or [],
},
)
return related['result']['g']['nodes']

def get_related_terms(
self,
terms: Union[str, List[str]],
ontology: str = 'Vocabulary',
subgraphType: str = 'similar',
) -> List[str]:
"""
Given some base term name(s), an ontology class and a subgraph type,
leverage the subgraphs route to return the list of related term name(s)

Args:
terms: the base term name(s) to start the graph traversal from
ontology: the ontology class to traverse
subgraphType: the type of traversal

Returns:
list of related term name(s)
"""
rids = convert_to_rid_list(self.query({'target': ontology, 'filters': {'name': terms}}))
nodes = self.get_related_records(
base=rids,
ontology=ontology,
subgraphType=subgraphType,
)
return [x['name'] for x in nodes.values()]


def get_rid(conn: GraphKBConnection, target: str, name: str) -> str:
"""
Expand Down
4 changes: 2 additions & 2 deletions pori_python/graphkb/vocab.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Callable, Dict, Iterable, List, Set, cast
from typing import Callable, Dict, Iterable, List, Set, cast, Union

from pori_python.types import Ontology

from . import GraphKBConnection
from .util import convert_to_rid_list


def query_by_name(ontology_class: str, base_term_name: str) -> Dict:
def query_by_name(ontology_class: str, base_term_name: Union[str, list[str]]) -> Dict:
return {'target': ontology_class, 'filters': {'name': base_term_name}}


Expand Down
41 changes: 33 additions & 8 deletions tests/test_graphkb/test_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pori_python.graphkb import GraphKBConnection
from pori_python.graphkb.genes import (
get_cancer_genes,
get_cancer_gene_flags,
get_cancer_predisposition_info,
get_gene_information,
get_gene_linked_cancer_predisposition_info,
Expand All @@ -27,7 +28,7 @@

CANONICAL_ONCOGENES = ['kras', 'nras', 'alk']
CANONICAL_TS = ['cdkn2a', 'tp53']
CANONICAL_CG = ['alb']
CANONICAL_OTHER_CG = ['alb']
CANONICAL_FUSION_GENES = ['alk', 'ewsr1', 'fli1']
CANONICAL_STRUCTURAL_VARIANT_GENES = ['brca1', 'dpyd', 'pten']
CANNONICAL_THERAPY_GENES = ['erbb2', 'brca2', 'egfr']
Expand Down Expand Up @@ -111,6 +112,30 @@ def conn():
return conn


@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
def test_cancer_gene_flags(conn):
# wo/ flags
result = get_cancer_gene_flags(conn)
assert [r['displayName'] for r in result] == sorted(
list({r['displayName'] for r in result}), # makes displayName unique and sorted
)
for gene in [*CANONICAL_OTHER_CG, *CANONICAL_TS, *CANONICAL_ONCOGENES]:
assert gene in {row['name'] for row in result}
# w/ flags
result = get_cancer_gene_flags(conn, flags=True)
for gene in [*CANONICAL_OTHER_CG, *CANONICAL_TS, *CANONICAL_ONCOGENES]:
assert gene in {row['name'] for row in result['cancerGene']}
for gene in CANONICAL_TS:
assert gene in {row['name'] for row in result['tumourSuppressive']}
assert gene not in {row['name'] for row in result['oncogenic']}
for gene in CANONICAL_ONCOGENES:
assert gene in {row['name'] for row in result['oncogenic']}
assert gene not in {row['name'] for row in result['tumourSuppressive']}
for gene in [*CANONICAL_OTHER_CG]:
assert gene not in {row['name'] for row in result['oncogenic']}
assert gene not in {row['name'] for row in result['tumourSuppressive']}


@pytest.mark.skipif(EXCLUDE_ONCOKB_TESTS, reason='excluding tests that depend on oncokb data')
def test_oncogene(conn):
result = get_oncokb_oncogenes(conn)
Expand All @@ -119,7 +144,7 @@ def test_oncogene(conn):
assert gene in names
for gene in CANONICAL_TS:
assert gene not in names
for gene in CANONICAL_CG:
for gene in CANONICAL_OTHER_CG:
assert gene not in names


Expand All @@ -131,7 +156,7 @@ def test_tumour_supressors(conn):
assert gene in names
for gene in CANONICAL_ONCOGENES:
assert gene not in names
for gene in CANONICAL_CG:
for gene in CANONICAL_OTHER_CG:
assert gene not in names


Expand All @@ -142,12 +167,12 @@ def test_tumour_supressors(conn):
def test_cancer_genes(conn):
result = get_cancer_genes(conn)
names = {row['name'] for row in result}
for gene in CANONICAL_CG:
for gene in CANONICAL_OTHER_CG:
assert gene in names
for gene in CANONICAL_TS:
assert gene not in names
assert gene in names
for gene in CANONICAL_ONCOGENES:
assert gene not in names
assert gene in names


@pytest.mark.skipif(
Expand Down Expand Up @@ -254,7 +279,7 @@ def test_get_gene_information(conn):
conn,
CANONICAL_ONCOGENES
+ CANONICAL_TS
+ CANONICAL_CG
+ CANONICAL_OTHER_CG
+ CANONICAL_FUSION_GENES
+ CANONICAL_STRUCTURAL_VARIANT_GENES
+ CANNONICAL_THERAPY_GENES
Expand Down Expand Up @@ -300,7 +325,7 @@ def test_get_gene_information(conn):
f'Missed kbStatementRelated {gene}'
)

for gene in CANONICAL_CG:
for gene in CANONICAL_ONCOGENES + CANONICAL_TS + CANONICAL_OTHER_CG:
assert gene in [g['name'] for g in gene_info if g.get('cancerGeneListMatch')], (
f'Missed cancerGeneListMatch {gene}'
)
Loading
Loading