Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions medcat-v2/medcat/components/linking/only_primary_name_linker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from typing import Iterator, Optional, Union
import logging

from medcat.tokenizing.tokens import MutableDocument, MutableEntity
from medcat.components.linking.context_based_linker import Linker
from medcat.components.linking.vector_context_model import (
PerDocumentTokenCache)
from medcat.utils.defaults import StatusTypes
from medcat.cdb import CDB
from medcat.vocab import Vocab
from medcat.config import Config


logger = logging.getLogger(__name__)


class OnlyPrimaryNamesLinker(Linker):
name = 'primary_name_only_linker'

def __init__(self, cdb: CDB, vocab: Vocab, config: Config) -> None:
super().__init__(cdb, vocab, config)
# don't need / use the context model
del self.context_model

def _process_entity_inference(
self, doc: MutableDocument,
entity: MutableEntity,
per_doc_valid_token_cache: PerDocumentTokenCache
) -> Iterator[MutableEntity]:
cuis = entity.link_candidates
if not cuis:
return
# Check does it have a detected name
name = entity.detected_name
if name is None:
logger.info("No name detected for entity %s", entity)
return
cnf_l = self.config.components.linking
if cnf_l.filter_before_disamb:
cuis = [cui for cui in cuis if cnf_l.filters.check_filters(cui)]
if not cuis:
logger.debug("No CUIs that fit filter for %s", entity)
return
if len(cuis) == 1:
if cnf_l.filters.check_filters(cuis[0]):
logger.info("Choosing only possible CUI %s for %s",
cuis[0], entity)
entity.cui = cuis[0]
entity.context_similarity = 1.0
yield entity
else:
logger.info(
"A single CUI (%s) was mapped to for %s but not in filter",
cuis[0], entity)
return
primary_cuis = [cui for cui in cuis
if (self.cdb.name2info[name]['per_cui_status'][cui]
in StatusTypes.PRIMARY_STATUS and
cnf_l.filters.check_filters(cui))]
if not primary_cuis:
logger.info("No pimary CUIs for name %s", name)
return
if len(primary_cuis) > 1:
logger.info(
"Ambiguous pimary CUIs for name %s: %s", name, primary_cuis)
return
cui = primary_cuis[0]
entity.cui = cui
entity.context_similarity = 1.0
yield entity

def train(self, cui: str,
entity: MutableEntity,
doc: MutableDocument,
negative: bool = False,
names: Union[list[str], dict] = [],
per_doc_valid_token_cache: Optional[PerDocumentTokenCache] = None
) -> None:
raise NoTrainingException("Training is not supported for this linker")

def _train_on_doc(self, doc: MutableDocument,
ner_ents: list[MutableEntity]
) -> Iterator[MutableEntity]:
raise NoTrainingException("Training is not supported for this linker")


class NoTrainingException(ValueError):
pass
4 changes: 4 additions & 0 deletions medcat-v2/medcat/components/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ def train(self, cui: str,
"medcat2_embedding_linker": (
"medcat.components.linking.embedding_linker",
"Linker.create_new_component"),
# primary name only
"primary_name_only_linker": (
"medcat.components.linking.only_primary_name_linker",
"OnlyPrimaryNamesLinker.create_new_component"),
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

from medcat.cdb import CDB
from medcat.cat import CAT
from medcat.vocab import Vocab
from medcat.components.linking.only_primary_name_linker import (
OnlyPrimaryNamesLinker)

import unittest

from ... import UNPACKED_EXAMPLE_MODEL_PACK_PATH


EXAMPLE_CDB_PATH = os.path.join(UNPACKED_EXAMPLE_MODEL_PACK_PATH, "cdb")
EXAMPLE_VOCAB_PATH = os.path.join(UNPACKED_EXAMPLE_MODEL_PACK_PATH, "vocab")


class PrimaryNamesLinkerTests(unittest.TestCase):
TEXT = (
"Man was diagnosed with severe kidney failure and acute diabetes "
"and presented with a light fever")

@classmethod
def setUpClass(cls):
vocab = Vocab.load(EXAMPLE_VOCAB_PATH)
cdb = CDB.load(EXAMPLE_CDB_PATH)
cdb.config.components.linking.comp_name = OnlyPrimaryNamesLinker.name
cls.cat = CAT(cdb, vocab)

def test_gets_entities(self):
ents = self.cat.get_entities(self.TEXT)
self.assertTrue(ents)
self.assertTrue(len(ents["entities"]))
Loading