Skip to content

Commit cabcfee

Browse files
committed
CU-8699049kf: Fix MetaCAT related notebooks
1 parent f0a3874 commit cabcfee

File tree

2 files changed

+10
-18
lines changed

2 files changed

+10
-18
lines changed

medcat/2_train_model/2_supervised_training/meta_annotation_training.ipynb

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,9 @@
144144
"# NOTE: we need to provide a BaseTokenizer to add the relevant additional data paths\n",
145145
"# to the relevant Entity/Span and Document implementation\n",
146146
"# we'll use the regex tokenizer here for example since it's easier to initialise\n",
147-
"# but you can use a spacy-based one, you just need to also pass:\n",
148-
"# - the model name (e.g 'en_core_web_md')\n",
149-
"# - the names of the disabled components (e.g ['ner', 'parser', 'vectors', 'textcat',\n",
150-
"# 'entity_linker', 'sentencizer', 'entity_ruler', 'merge_noun_chunks', 'merge_entities', 'merge_subtokens'])\n",
151-
"# - whether diacritics should be used\n",
152-
"# - max document length (e.g 1_000_000)\n",
153-
"base_tokenizer = create_tokenizer(\"regex\")\n",
147+
"# but you can use a spacy-based one, you just need to also pass the appropriate config\n",
148+
"from medcat.config import Config\n",
149+
"base_tokenizer = create_tokenizer(\"regex\", Config())\n",
154150
"for meta_model in meta_model_names:\n",
155151
" meta_cat_path = os.path.join(base_dir_meta_models, exp_start + meta_model)\n",
156152
" if model_is_legacy:\n",
@@ -228,7 +224,7 @@
228224
],
229225
"metadata": {
230226
"kernelspec": {
231-
"display_name": "venv_v2",
227+
"display_name": "venv_v2_311",
232228
"language": "python",
233229
"name": "python3"
234230
},
@@ -242,7 +238,7 @@
242238
"name": "python",
243239
"nbconvert_exporter": "python",
244240
"pygments_lexer": "ipython3",
245-
"version": "3.10.13"
241+
"version": "3.11.12"
246242
}
247243
},
248244
"nbformat": 4,

medcat/2_train_model/2_supervised_training/meta_annotation_training_advanced.ipynb

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,9 @@
117117
"# NOTE: we need to provide a BaseTokenizer to add the relevant additional data paths\n",
118118
"# to the relevant Entity/Span and Document implementation\n",
119119
"# we'll use the regex tokenizer here for example since it's easier to initialise\n",
120-
"# but you can use a spacy-based one, you just need to also pass:\n",
121-
"# - the model name (e.g 'en_core_web_md')\n",
122-
"# - the names of the disabled components (e.g ['ner', 'parser', 'vectors', 'textcat',\n",
123-
"# 'entity_linker', 'sentencizer', 'entity_ruler', 'merge_noun_chunks', 'merge_entities', 'merge_subtokens'])\n",
124-
"# - whether diacritics should be used\n",
125-
"# - max document length (e.g 1_000_000)\n",
126-
"base_tokenizer = create_tokenizer(\"regex\")"
120+
"# but you can use a spacy-based one, you just need to also pass the appropraite config\n",
121+
"from medcat.config import Config\n",
122+
"base_tokenizer = create_tokenizer(\"regex\", Config())"
127123
]
128124
},
129125
{
@@ -339,7 +335,7 @@
339335
],
340336
"metadata": {
341337
"kernelspec": {
342-
"display_name": "Python 3",
338+
"display_name": "venv_v2_311",
343339
"language": "python",
344340
"name": "python3"
345341
},
@@ -353,7 +349,7 @@
353349
"name": "python",
354350
"nbconvert_exporter": "python",
355351
"pygments_lexer": "ipython3",
356-
"version": "3.8.8"
352+
"version": "3.11.12"
357353
}
358354
},
359355
"nbformat": 4,

0 commit comments

Comments
 (0)