From e237a8f18fb254ae08137ad1d067d77ff6080a5a Mon Sep 17 00:00:00 2001 From: Ranjith Rajendran Date: Tue, 14 Apr 2026 13:21:59 -0700 Subject: [PATCH] Add ERReadmissionMIMIC4 task for ER-specific 30-day readmission prediction --- docs/api/tasks.rst | 1 + .../pyhealth.tasks.mimic4_er_readmission.rst | 7 + examples/mimic4_er_readmission_retain.py | 137 +++++++ examples/mimic4_er_readmission_transformer.py | 116 ++++++ pyhealth/tasks/__init__.py | 1 + pyhealth/tasks/mimic4_er_readmission.py | 129 ++++++ tests/test_mimic4_er_readmission.py | 384 ++++++++++++++++++ 7 files changed, 775 insertions(+) create mode 100644 docs/api/tasks/pyhealth.tasks.mimic4_er_readmission.rst create mode 100644 examples/mimic4_er_readmission_retain.py create mode 100644 examples/mimic4_er_readmission_transformer.py create mode 100644 pyhealth/tasks/mimic4_er_readmission.py create mode 100644 tests/test_mimic4_er_readmission.py diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst index 399b8f1aa..7ac7422c3 100644 --- a/docs/api/tasks.rst +++ b/docs/api/tasks.rst @@ -218,6 +218,7 @@ Available Tasks Mortality Prediction (StageNet MIMIC-IV) Patient Linkage (MIMIC-III) Readmission Prediction + ER-Specific Readmission (MIMIC-IV) Sleep Staging Sleep Staging (SleepEDF) Temple University EEG Tasks diff --git a/docs/api/tasks/pyhealth.tasks.mimic4_er_readmission.rst b/docs/api/tasks/pyhealth.tasks.mimic4_er_readmission.rst new file mode 100644 index 000000000..f76b8800c --- /dev/null +++ b/docs/api/tasks/pyhealth.tasks.mimic4_er_readmission.rst @@ -0,0 +1,7 @@ +pyhealth.tasks.mimic4_er_readmission +====================================== + +.. autoclass:: pyhealth.tasks.mimic4_er_readmission.ERReadmissionMIMIC4 + :members: + :undoc-members: + :show-inheritance: diff --git a/examples/mimic4_er_readmission_retain.py b/examples/mimic4_er_readmission_retain.py new file mode 100644 index 000000000..a0f477bd3 --- /dev/null +++ b/examples/mimic4_er_readmission_retain.py @@ -0,0 +1,137 @@ +""" +Name: Ranjithkumar Rajendran +NetID: rr54 +Paper: KEEP (CHIL 2025) — Elhussein et al. + +Ablation 1 — Task Comparison. +Compares standard inpatient readmission +(ReadmissionPredictionMIMIC4) vs ER-specific +readmission (ERReadmissionMIMIC4) using RETAIN. +""" +from pyhealth.datasets import ( + MIMIC4EHRDataset, + split_by_patient, + get_dataloader, +) +from pyhealth.tasks import ( + ERReadmissionMIMIC4, + ReadmissionPredictionMIMIC4, +) +from pyhealth.models import RETAIN +from pyhealth.trainer import Trainer +import math + + +def _fmt(v): + """Format a metric, showing 'n/a' for NaN.""" + return "n/a" if math.isnan(v) else f"{v:.4f}" + + +def _print_metrics(name, m): + """Print ROC-AUC and PR-AUC for a model.""" + print(f"{name} ROC-AUC: {_fmt(m['roc_auc'])}") + print(f"{name} PR-AUC : {_fmt(m['pr_auc'])}") + + +def main(): + """Run the Task-Comparison ablation.""" + print("Loading Dataset ...") + # Point this to your MIMIC-IV root directory. + # e.g. "/content/drive/MyDrive/mimic-iv/2.2" + dataset = MIMIC4EHRDataset( + root="/path/to/mimic-iv-2.2", + tables=[ + "diagnoses_icd", + "procedures_icd", + "prescriptions", + ], + dev=True, + ) + + # --- Task 1: Standard Inpatient Readmission ---- + print("\n[Ablation] Task 1: Standard Readmission") + ds_std = dataset.set_task( + ReadmissionPredictionMIMIC4() + ) + + # --- Task 2: ER-Specific Readmission ----------- + print("\n[Ablation] Task 2: ER Readmission") + ds_er = dataset.set_task(ERReadmissionMIMIC4()) + + print(f"\nStandard samples : {len(ds_std)}") + print(f"ER-Specific samples: {len(ds_er)}") + + # --- Initialise models ------------------------- + print("\nInitializing RETAIN on both cohorts ...") + model_std = RETAIN(dataset=ds_std) + print(" -> Standard task: OK") + model_er = RETAIN(dataset=ds_er) + print(" -> ER task : OK") + + # --- Split + Dataloaders ----------------------- + print("\n--- Splitting data ---") + tr_s, va_s, te_s = split_by_patient( + ds_std, [0.8, 0.1, 0.1] + ) + tr_e, va_e, te_e = split_by_patient( + ds_er, [0.8, 0.1, 0.1] + ) + + if len(va_s) == 0 or len(va_e) == 0: + print( + "Val set is empty (tiny synthetic data).\n" + "Pipeline verified — skipping Trainer." + ) + return + + dl = get_dataloader # alias for brevity + tr_l_s = dl(tr_s, batch_size=64, shuffle=True) + va_l_s = dl(va_s, batch_size=64, shuffle=False) + te_l_s = dl(te_s, batch_size=64, shuffle=False) + + tr_l_e = dl(tr_e, batch_size=64, shuffle=True) + va_l_e = dl(va_e, batch_size=64, shuffle=False) + te_l_e = dl(te_e, batch_size=64, shuffle=False) + + # --- Train Standard ---------------------------- + print("\n--- Training: Standard Readmission ---") + t_std = Trainer(model=model_std) + t_std.train( + train_dataloader=tr_l_s, + val_dataloader=va_l_s, + epochs=10, + monitor="pr_auc", + ) + m_std = t_std.evaluate(te_l_s) + _print_metrics("Standard", m_std) + + # --- Train ER ---------------------------------- + print("\n--- Training: ER Readmission ---") + t_er = Trainer(model=model_er) + t_er.train( + train_dataloader=tr_l_e, + val_dataloader=va_l_e, + epochs=10, + monitor="pr_auc", + ) + m_er = t_er.evaluate(te_l_e) + _print_metrics("ER", m_er) + + # --- Compare ----------------------------------- + s = m_std["pr_auc"] + e = m_er["pr_auc"] + if math.isnan(s) or math.isnan(e): + print("\nAblation note: PR-AUC undefined " + "on this tiny split (expected).") + else: + d = s - e + print( + f"\nAblation result: ER cohort " + f"PR-AUC is {d * 100:.2f}% " + f"{'lower' if d > 0 else 'higher'} " + f"than standard." + ) + + +if __name__ == "__main__": + main() diff --git a/examples/mimic4_er_readmission_transformer.py b/examples/mimic4_er_readmission_transformer.py new file mode 100644 index 000000000..4593f0f12 --- /dev/null +++ b/examples/mimic4_er_readmission_transformer.py @@ -0,0 +1,116 @@ +""" +Name: Ranjithkumar Rajendran +NetID: rr54 +Paper: KEEP (CHIL 2025) — Elhussein et al. + +Ablation 2 — Architecture Comparison. +Compares Transformer vs RETAIN on the new +ERReadmissionMIMIC4 task. +""" +from pyhealth.datasets import ( + MIMIC4EHRDataset, + split_by_patient, + get_dataloader, +) +from pyhealth.tasks import ERReadmissionMIMIC4 +from pyhealth.models import Transformer, RETAIN +from pyhealth.trainer import Trainer +import math + + +def _fmt(v): + """Format a metric, showing 'n/a' for NaN.""" + return "n/a" if math.isnan(v) else f"{v:.4f}" + + +def _print_metrics(name, m): + """Print ROC-AUC and PR-AUC for a model.""" + print(f"{name} ROC-AUC: {_fmt(m['roc_auc'])}") + print(f"{name} PR-AUC : {_fmt(m['pr_auc'])}") + + +def main(): + """Run the Architecture-Comparison ablation.""" + print("Loading Dataset ...") + # Point this to your MIMIC-IV root directory. + dataset = MIMIC4EHRDataset( + root="/path/to/mimic-iv-2.2", + tables=["diagnoses_icd"], + dev=True, + ) + + print("\nApplying ER-Specific Readmission Task ...") + ds_er = dataset.set_task(ERReadmissionMIMIC4()) + print(f"ER samples: {len(ds_er)}") + + # --- Initialise both architectures ------------- + print("\n[Ablation] Architecture 1: RETAIN") + model_ret = RETAIN(dataset=ds_er) + print(" -> RETAIN OK") + + print("\n[Ablation] Architecture 2: Transformer") + model_tfm = Transformer(dataset=ds_er) + print(" -> Transformer OK") + + # --- Split + Dataloaders ----------------------- + print("\n--- Splitting data ---") + tr, va, te = split_by_patient( + ds_er, [0.8, 0.1, 0.1] + ) + + if len(va) == 0: + print( + "Val set is empty (tiny synthetic data).\n" + "Pipeline verified — skipping Trainer." + ) + return + + dl = get_dataloader + tr_l = dl(tr, batch_size=64, shuffle=True) + va_l = dl(va, batch_size=64, shuffle=False) + te_l = dl(te, batch_size=64, shuffle=False) + + # --- Train RETAIN ------------------------------ + print("\n--- Training: RETAIN ---") + t_ret = Trainer(model=model_ret) + t_ret.train( + train_dataloader=tr_l, + val_dataloader=va_l, + epochs=10, + monitor="pr_auc", + ) + m_ret = t_ret.evaluate(te_l) + _print_metrics("RETAIN", m_ret) + + # --- Train Transformer ------------------------- + print("\n--- Training: Transformer ---") + t_tfm = Trainer(model=model_tfm) + t_tfm.train( + train_dataloader=tr_l, + val_dataloader=va_l, + epochs=10, + monitor="pr_auc", + ) + m_tfm = t_tfm.evaluate(te_l) + _print_metrics("Transformer", m_tfm) + + # --- Compare ----------------------------------- + r = m_ret["pr_auc"] + t = m_tfm["pr_auc"] + if math.isnan(r) or math.isnan(t): + print( + "\nAblation note: PR-AUC undefined " + "on this tiny split (expected)." + ) + else: + d = t - r + print( + f"\nAblation result: Transformer " + f"PR-AUC is {d * 100:.2f}% " + f"{'higher' if d > 0 else 'lower'}" + f" than RETAIN." + ) + + +if __name__ == "__main__": + main() diff --git a/pyhealth/tasks/__init__.py b/pyhealth/tasks/__init__.py index 797988377..04f9e71f4 100644 --- a/pyhealth/tasks/__init__.py +++ b/pyhealth/tasks/__init__.py @@ -31,6 +31,7 @@ from .length_of_stay_stagenet_mimic4 import LengthOfStayStageNetMIMIC4 from .medical_coding import MIMIC3ICD9Coding from .medical_transcriptions_classification import MedicalTranscriptionsClassification +from .mimic4_er_readmission import ERReadmissionMIMIC4 from .mortality_prediction import ( MortalityPredictionEICU, MortalityPredictionEICU2, diff --git a/pyhealth/tasks/mimic4_er_readmission.py b/pyhealth/tasks/mimic4_er_readmission.py new file mode 100644 index 000000000..fc8a5fcfa --- /dev/null +++ b/pyhealth/tasks/mimic4_er_readmission.py @@ -0,0 +1,129 @@ +""" +Name: Ranjithkumar Rajendran +NetID: rr54 +Paper: KEEP (CHIL 2025) — Elhussein et al. +""" +from datetime import datetime, timedelta +from typing import Dict, List + +from pyhealth.data import Event, Patient +from pyhealth.tasks import BaseTask + + +class ERReadmissionMIMIC4(BaseTask): + """ER-Specific Readmission prediction on MIMIC-IV. + + Predicts whether an emergency-room patient will be + readmitted within a specified window (default 30 days) + based on clinical information from the current ER visit. + + Only visits whose ``admission_location`` is + ``'EMERGENCY ROOM'`` are considered. Diagnosis codes + are prefixed with their ICD version (``"9_"`` or + ``"10_"``) to match the format used by + :class:`~pyhealth.tasks.ReadmissionPredictionMIMIC4`. + + Attributes: + task_name (str): Name of the task. + input_schema (Dict[str, str]): Input schema. + output_schema (Dict[str, str]): Output schema. + + Examples: + >>> from pyhealth.datasets import MIMIC4EHRDataset + >>> from pyhealth.tasks import ERReadmissionMIMIC4 + >>> dataset = MIMIC4EHRDataset( + ... root="/path/to/mimic-iv/2.2", + ... tables=["diagnoses_icd"], + ... ) + >>> task = ERReadmissionMIMIC4() + >>> samples = dataset.set_task(task) + """ + + task_name: str = "ERReadmissionMIMIC4" + input_schema: Dict[str, str] = { + "conditions": "sequence", + } + output_schema: Dict[str, str] = {"readmission": "binary"} + + def __init__( + self, window: timedelta = timedelta(days=30) + ) -> None: + """Initialise the task. + + Args: + window: If a subsequent admission occurs within + this window of an ER discharge, it is + labelled as a readmission. Defaults to + 30 days per KEEP (2025). + """ + self.window = window + + def __call__(self, patient: Patient) -> List[Dict]: + """Generate binary samples for one patient. + + Visits with no diagnoses are skipped. Only visits + where ``admission_location == 'EMERGENCY ROOM'`` + are processed. + + Args: + patient: A PyHealth patient object. + + Returns: + A list of sample dicts, each containing + ``visit_id``, ``patient_id``, ``conditions`` + (list of versioned ICD strings), and + ``readmission`` (0 or 1). + """ + admissions: List[Event] = patient.get_events( + event_type="admissions" + ) + if len(admissions) < 2: + return [] + + samples = [] + for i in range(len(admissions) - 1): + adm = admissions[i] + loc = getattr(adm, "admission_location", "") + if loc != "EMERGENCY ROOM": + continue + + filt = ("hadm_id", "==", adm.hadm_id) + + diagnoses = [] + for ev in patient.get_events( + event_type="diagnoses_icd", + filters=[filt], + ): + ver = getattr(ev, "icd_version", "10") + diagnoses.append( + f"{ver}_{ev.icd_code}" + ) + + if not diagnoses: + continue + + try: + disch = datetime.strptime( + adm.dischtime, + "%Y-%m-%d %H:%M:%S", + ) + except ValueError: + disch = datetime.strptime( + adm.dischtime, "%Y-%m-%d" + ) + + readmit = int( + (admissions[i + 1].timestamp - disch) + < self.window + ) + + samples.append( + { + "visit_id": adm.hadm_id, + "patient_id": patient.patient_id, + "conditions": diagnoses, + "readmission": readmit, + } + ) + + return samples diff --git a/tests/test_mimic4_er_readmission.py b/tests/test_mimic4_er_readmission.py new file mode 100644 index 000000000..cdc620a7c --- /dev/null +++ b/tests/test_mimic4_er_readmission.py @@ -0,0 +1,384 @@ +""" +Name: Ranjithkumar Rajendran +NetID: rr54 +Paper: KEEP (CHIL 2025) — Elhussein et al. + +Unit tests for ERReadmissionMIMIC4 using synthetic +MockPatient data (3 core + 4 edge-case scenarios). +""" +import datetime +import polars as pl +from pyhealth.data import Patient +from pyhealth.tasks.mimic4_er_readmission import ( + ERReadmissionMIMIC4, +) + + +def _patient(pid, events): + """Build a mock Patient from a list of dicts.""" + df = pl.DataFrame(events) + return Patient(patient_id=pid, data_source=df) + + +# ---------------------------------------------------------------- +# Core scenarios +# ---------------------------------------------------------------- + +def test_er_positive_readmission(): + """ER admit → readmitted within 10 days → label 1.""" + task = ERReadmissionMIMIC4( + window=datetime.timedelta(days=30) + ) + p = _patient("P1", [ + { + "timestamp": datetime.datetime(2026, 1, 1), + "event_type": "admissions", + "admissions/hadm_id": "H1", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-02 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + { + "timestamp": datetime.datetime( + 2026, 1, 1, 12 + ), + "event_type": "diagnoses_icd", + "admissions/hadm_id": None, + "admissions/admission_location": None, + "admissions/dischtime": None, + "diagnoses_icd/hadm_id": "H1", + "diagnoses_icd/icd_code": "401.9", + "diagnoses_icd/icd_version": "9", + }, + { + "timestamp": datetime.datetime(2026, 1, 10), + "event_type": "admissions", + "admissions/hadm_id": "H2", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-11 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + ]) + samples = task(p) + assert len(samples) == 1 + assert samples[0]["readmission"] == 1 + assert isinstance(samples[0]["conditions"], list) + assert samples[0]["conditions"] == ["9_401.9"] + + +def test_er_negative_readmission(): + """ER admit → next admit after 40 days → label 0.""" + task = ERReadmissionMIMIC4( + window=datetime.timedelta(days=30) + ) + p = _patient("P2", [ + { + "timestamp": datetime.datetime(2026, 1, 1), + "event_type": "admissions", + "admissions/hadm_id": "H1", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-02 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + { + "timestamp": datetime.datetime( + 2026, 1, 1, 12 + ), + "event_type": "diagnoses_icd", + "admissions/hadm_id": None, + "admissions/admission_location": None, + "admissions/dischtime": None, + "diagnoses_icd/hadm_id": "H1", + "diagnoses_icd/icd_code": "250.00", + "diagnoses_icd/icd_version": "9", + }, + { + "timestamp": datetime.datetime(2026, 3, 1), + "event_type": "admissions", + "admissions/hadm_id": "H2", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-03-05 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + ]) + samples = task(p) + assert len(samples) == 1 + assert samples[0]["readmission"] == 0 + assert samples[0]["conditions"] == ["9_250.00"] + + +def test_non_er_admission_skipped(): + """Non-ER admit → should produce no samples.""" + task = ERReadmissionMIMIC4() + p = _patient("P3", [ + { + "timestamp": datetime.datetime(2026, 1, 1), + "event_type": "admissions", + "admissions/hadm_id": "H1", + "admissions/admission_location": ( + "PHYSICIAN REFERRAL" + ), + "admissions/dischtime": ( + "2026-01-02 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + { + "timestamp": datetime.datetime( + 2026, 1, 1, 12 + ), + "event_type": "diagnoses_icd", + "admissions/hadm_id": None, + "admissions/admission_location": None, + "admissions/dischtime": None, + "diagnoses_icd/hadm_id": "H1", + "diagnoses_icd/icd_code": "428.0", + "diagnoses_icd/icd_version": "9", + }, + { + "timestamp": datetime.datetime(2026, 1, 10), + "event_type": "admissions", + "admissions/hadm_id": "H2", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-11 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + ]) + samples = task(p) + assert len(samples) == 0 + + +# ---------------------------------------------------------------- +# Edge-case scenarios +# ---------------------------------------------------------------- + +def test_single_admission_returns_empty(): + """Only one admission → impossible to determine + readmission → return [].""" + task = ERReadmissionMIMIC4() + p = _patient("P4", [ + { + "timestamp": datetime.datetime(2026, 1, 1), + "event_type": "admissions", + "admissions/hadm_id": "H1", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-02 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + ]) + assert task(p) == [] + + +def test_er_no_diagnoses_skipped(): + """ER admission exists but has zero diagnoses → + that visit should be skipped.""" + task = ERReadmissionMIMIC4() + p = _patient("P5", [ + { + "timestamp": datetime.datetime(2026, 1, 1), + "event_type": "admissions", + "admissions/hadm_id": "H1", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-02 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + { + "timestamp": datetime.datetime(2026, 1, 10), + "event_type": "admissions", + "admissions/hadm_id": "H2", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-11 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + ]) + # No diagnoses_icd events → should return [] + assert task(p) == [] + + +def test_custom_window_boundary(): + """Readmission exactly at window boundary (7 days) + with a 7-day window should NOT be labelled 1 + because the comparison is strict less-than.""" + task = ERReadmissionMIMIC4( + window=datetime.timedelta(days=7) + ) + p = _patient("P6", [ + { + "timestamp": datetime.datetime(2026, 1, 1), + "event_type": "admissions", + "admissions/hadm_id": "H1", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-02 00:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + { + "timestamp": datetime.datetime( + 2026, 1, 1, 6 + ), + "event_type": "diagnoses_icd", + "admissions/hadm_id": None, + "admissions/admission_location": None, + "admissions/dischtime": None, + "diagnoses_icd/hadm_id": "H1", + "diagnoses_icd/icd_code": "J18.9", + "diagnoses_icd/icd_version": "10", + }, + { + # Exactly 7 days after discharge + "timestamp": datetime.datetime(2026, 1, 9), + "event_type": "admissions", + "admissions/hadm_id": "H2", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-10 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + ]) + samples = task(p) + assert len(samples) == 1 + # 7 days == window → strict "<" → label 0 + assert samples[0]["readmission"] == 0 + assert samples[0]["conditions"] == ["10_J18.9"] + + +def test_multiple_er_visits(): + """Patient with 3 ER admissions → should produce + 2 samples (one per non-last admission).""" + task = ERReadmissionMIMIC4() + p = _patient("P7", [ + { + "timestamp": datetime.datetime(2026, 1, 1), + "event_type": "admissions", + "admissions/hadm_id": "H1", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-02 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + { + "timestamp": datetime.datetime( + 2026, 1, 1, 6 + ), + "event_type": "diagnoses_icd", + "admissions/hadm_id": None, + "admissions/admission_location": None, + "admissions/dischtime": None, + "diagnoses_icd/hadm_id": "H1", + "diagnoses_icd/icd_code": "I10", + "diagnoses_icd/icd_version": "10", + }, + { + "timestamp": datetime.datetime(2026, 1, 10), + "event_type": "admissions", + "admissions/hadm_id": "H2", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-01-11 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + { + "timestamp": datetime.datetime( + 2026, 1, 10, 6 + ), + "event_type": "diagnoses_icd", + "admissions/hadm_id": None, + "admissions/admission_location": None, + "admissions/dischtime": None, + "diagnoses_icd/hadm_id": "H2", + "diagnoses_icd/icd_code": "E11.9", + "diagnoses_icd/icd_version": "10", + }, + { + "timestamp": datetime.datetime(2026, 2, 1), + "event_type": "admissions", + "admissions/hadm_id": "H3", + "admissions/admission_location": ( + "EMERGENCY ROOM" + ), + "admissions/dischtime": ( + "2026-02-02 12:00:00" + ), + "diagnoses_icd/hadm_id": None, + "diagnoses_icd/icd_code": None, + "diagnoses_icd/icd_version": None, + }, + ]) + samples = task(p) + assert len(samples) == 2 + assert samples[0]["conditions"] == ["10_I10"] + assert samples[1]["conditions"] == ["10_E11.9"] + # H1→H2 is 8 days → readmit + assert samples[0]["readmission"] == 1 + # H2→H3 is 21 days → readmit + assert samples[1]["readmission"] == 1