From 071022dbdd40bc3315f2109d61a263da2b08ffee Mon Sep 17 00:00:00 2001
From: philipp-gecko <259382694+philipp-gecko@users.noreply.github.com>
Date: Wed, 25 Feb 2026 14:57:51 +0100
Subject: [PATCH] Add SI (structured register content) detail page scraping

Extends the CLI to fetch the XJustiz XML document from the Handelsregister
portal, parsing company details including address, legal form, share capital,
managing directors, prokura holders, business purpose, and register entries.

New --detail / -det flag fetches SI data for the first search result.
Both text and JSON (-j) output modes include the enriched data.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 handelsregister.py      | 383 +++++++++++++++++++++++++++++++++++++++-
 test_handelsregister.py |  83 ++++++++-
 2 files changed, 455 insertions(+), 11 deletions(-)

diff --git a/handelsregister.py b/handelsregister.py
index 03ccc1a..137ce9b 100755
--- a/handelsregister.py
+++ b/handelsregister.py
@@ -12,6 +12,7 @@
 import sys
 from bs4 import BeautifulSoup
 import urllib.parse
+import xml.etree.ElementTree as ET
 
 # Dictionaries to map arguments to values
 schlagwortOptionen = {
@@ -20,6 +21,24 @@
     "exact": 3
 }
 
+# XJustiz namespace
+NS = {'tns': 'http://www.xjustiz.de'}
+
+# Role code to label mapping (from xjustiz codeliste:gds.rollenbezeichnung)
+ROLE_CODES = {
+    '086': 'Geschäftsführer(in)',
+    '087': 'Vorstand',
+    '285': 'Prokurist(in)',
+    '287': 'Rechtsträger(in)',
+    '288': 'Registergericht',
+    '215': 'Einreicher(in)',
+    '061': 'Gesellschafter(in)',
+    '062': 'Kommanditist(in)',
+    '063': 'Persönlich haftende(r) Gesellschafter(in)',
+    '089': 'Liquidator(in)',
+    '297': 'Inhaber(in)',
+}
+
 class HandelsRegister:
     def __init__(self, args):
         self.args = args
@@ -49,7 +68,7 @@ def __init__(self, args):
             ),
             (   "Connection", "keep-alive"    ),
         ]
-        
+
         self.cachedir = pathlib.Path(tempfile.gettempdir()) / "handelsregister_cache"
         self.cachedir.mkdir(parents=True, exist_ok=True)
 
@@ -96,8 +115,294 @@ def search_company(self):
             # TODO catch the situation if there's more than one company?
             # TODO get all documents attached to the exact company
             # TODO parse useful information out of the PDFs
+        self._last_search_html = html
         return get_companies_in_searchresults(html)
 
+    def fetch_company_detail(self, result_index=0):
+        """Fetch the SI (Strukturierter Registerinhalt) document for a search result.
+
+        Must be called after search_company(). Uses the cached search HTML to find
+        the SI link's PrimeFaces submit params, then submits the form to fetch the
+        XML document.
+
+        Returns a dict of parsed company detail fields, or None if SI is unavailable.
+        """
+        html = self._last_search_html
+        soup = BeautifulSoup(html, 'html.parser')
+
+        # Find the SI link for the given result index
+        si_link = None
+        for a in soup.find_all('a'):
+            link_id = a.get('id', '')
+            # Match pattern: ergebnissForm:selectedSuchErgebnisFormTable:{index}:...:fade_
+            if ':{}:'.format(result_index) in link_id:
+                span = a.find('span')
+                if span and span.text.strip() == 'SI':
+                    si_link = a
+                    break
+
+        if not si_link:
+            return None
+
+        onclick = si_link.get('onclick', '')
+        if not onclick:
+            return None
+
+        # Parse PrimeFaces.addSubmitParam params from onclick
+        pairs = re.findall(r"'([^']+)':'([^']*)'", onclick)
+        if not pairs:
+            return None
+
+        # Select the ergebnissForm and inject hidden params
+        self.browser.select_form(name='ergebnissForm')
+        for key, value in pairs:
+            self.browser.form.new_control('hidden', key, {'value': value})
+        self.browser.form.fixup()
+
+        response_si = self.browser.submit()
+        si_xml = response_si.read().decode('utf-8')
+
+        # Cache the SI XML
+        cachename = self.companyname2cachename(self.args.schlagwoerter + '_SI')
+        with open(cachename, 'w') as f:
+            f.write(si_xml)
+
+        return parse_si_detail(si_xml)
+
+
+def _build_comment_map(xml_str):
+    """Build a map from XML element code values to their preceding comments.
+
+    XJustiz uses XML comments to provide human-readable labels for coded values, e.g.:
+      <!--Gesellschaft mit beschränkter Haftung (GmbH)--><code>221110</code>
+    ElementTree drops comments during parsing, so we extract them from the raw string.
+    """
+    comment_map = {}
+    for match in re.finditer(r'<!--(.+?)-->\s*<code>([^<]+)</code>', xml_str):
+        comment_map[match.group(2)] = match.group(1)
+    return comment_map
+
+
+def parse_si_detail(xml_str):
+    """Parse the SI (Strukturierter Registerinhalt) XJustiz XML into a dict.
+
+    The SI document is an XJustiz XML file containing structured register content:
+    company name, address, legal form, capital, directors, prokura holders, etc.
+    """
+    root = ET.fromstring(xml_str)
+    comment_map = _build_comment_map(xml_str)
+    detail = {}
+
+    # Build a role-number-to-person/org mapping from beteiligung entries
+    roles = {}  # rollennummer -> {role_code, role_label, person_data}
+    for beteiligung in root.findall('.//tns:beteiligung', NS):
+        rolle_elems = beteiligung.findall('tns:rolle', NS)
+        beteiligter = beteiligung.find('tns:beteiligter', NS)
+        if beteiligter is None:
+            continue
+
+        for rolle in rolle_elems:
+            rollennummer = _text(rolle, 'tns:rollennummer')
+            role_code_elem = rolle.find('tns:rollenbezeichnung', NS)
+            role_code = _text(role_code_elem, 'code') if role_code_elem is not None else None
+            role_label = ROLE_CODES.get(role_code, comment_map.get(role_code, role_code))
+
+            person_data = _parse_beteiligter(beteiligter, comment_map)
+            if rollennummer:
+                roles[rollennummer] = {
+                    'role_code': role_code,
+                    'role_label': role_label,
+                    **person_data
+                }
+
+    # Company info from the Rechtsträger (role code 287)
+    for rnum, rdata in roles.items():
+        if rdata.get('role_code') == '287':
+            detail['name'] = rdata.get('name')
+            detail['legal_form'] = rdata.get('legal_form')
+            detail['legal_form_code'] = rdata.get('legal_form_code')
+            detail['seat'] = rdata.get('seat')
+            if rdata.get('address'):
+                detail['address'] = rdata['address']
+            break
+
+    # basisdatenRegister
+    basisdaten = root.find('.//tns:basisdatenRegister', NS)
+    if basisdaten is not None:
+        # Satzungsdatum
+        satzung = basisdaten.find('.//tns:aktuellesSatzungsdatum', NS)
+        if satzung is not None and satzung.text:
+            detail['articles_of_association_date'] = satzung.text
+
+        # Gegenstand (business purpose)
+        gegenstand = basisdaten.find('.//tns:gegenstand', NS)
+        if gegenstand is not None and gegenstand.text:
+            detail['business_purpose'] = gegenstand.text.strip()
+
+        # Vertretungsregelung (representation rules)
+        allg_vertretung = basisdaten.find('.//tns:allgemeineVertretungsregelung', NS)
+        if allg_vertretung is not None:
+            vb = allg_vertretung.find('.//tns:vertretungsbefugnis', NS)
+            if vb is not None:
+                vb_code = _text(vb, 'code')
+                if vb_code and vb_code in comment_map:
+                    detail['representation_rules'] = comment_map[vb_code]
+
+        # Representatives with their specific rules
+        for vb_elem in basisdaten.findall('.//tns:vertretungsberechtigte', NS):
+            ref = _text(vb_elem, 'tns:ref.rollennummer')
+            if ref and ref in roles:
+                rep = roles[ref]
+                besondere = vb_elem.find('tns:besondereVertretungsregelung', NS)
+                if besondere is not None:
+                    freitext = besondere.find('.//tns:vertretungsbefugnisFreitext', NS)
+                    if freitext is not None and freitext.text:
+                        rep['representation'] = freitext.text.strip().rstrip(';')
+                    else:
+                        vb_code_elem = besondere.find('.//tns:vertretungsbefugnis', NS)
+                        if vb_code_elem is not None:
+                            code_val = _text(vb_code_elem, 'code')
+                            if code_val and code_val in comment_map:
+                                rep['representation'] = comment_map[code_val]
+
+                    befreiung = besondere.find('.//tns:befreiungVon181BGB', NS)
+                    if befreiung is not None:
+                        rep['exempt_181_bgb'] = True
+
+    # Collect managing directors and prokura holders
+    directors = []
+    prokura = []
+    for rnum, rdata in roles.items():
+        if rdata.get('role_code') == '086':  # Geschäftsführer
+            directors.append(rdata)
+        elif rdata.get('role_code') == '087':  # Vorstand
+            directors.append(rdata)
+        elif rdata.get('role_code') == '285':  # Prokurist
+            prokura.append(rdata)
+
+    if directors:
+        detail['directors'] = directors
+    if prokura:
+        detail['prokura'] = prokura
+
+    # Capital (Stammkapital / Grundkapital)
+    stammkapital = root.find('.//tns:stammkapital', NS)
+    if stammkapital is None:
+        stammkapital = root.find('.//tns:grundkapital', NS)
+    if stammkapital is not None:
+        zahl = _text(stammkapital, 'tns:zahl')
+        waehrung_elem = stammkapital.find('.//tns:waehrung', NS)
+        waehrung_code = _text(waehrung_elem, 'code') if waehrung_elem is not None else None
+        if zahl:
+            detail['capital'] = {
+                'amount': zahl,
+                'currency': waehrung_code or 'EUR'
+            }
+
+    # Register info
+    aktenzeichen = root.find('.//tns:aktenzeichen.strukturiert', NS)
+    if aktenzeichen is not None:
+        register = aktenzeichen.find('tns:register', NS)
+        nummer = _text(aktenzeichen, 'tns:laufendeNummer')
+        if register is not None and nummer:
+            reg_code = _text(register, 'code')
+            detail['register_type'] = reg_code
+            detail['register_number'] = nummer
+
+    # Auszug metadata
+    auszug = root.find('.//tns:auszug', NS)
+    if auszug is not None:
+        detail['retrieval_date'] = _text(auszug, 'tns:abrufdatum')
+        detail['last_entry_date'] = _text(auszug, 'tns:letzteEintragung')
+        detail['num_entries'] = _text(auszug, 'tns:anzahlEintragungen')
+
+    # Eintragungstexte (register entry texts)
+    entries = []
+    for et_elem in root.findall('.//tns:eintragungstext', NS):
+        entry = {}
+        entry['column'] = _text(et_elem, 'tns:spalte')
+        entry['position'] = _text(et_elem, 'tns:position')
+        entry['number'] = _text(et_elem, 'tns:laufendeNummer')
+        entry['text'] = _text(et_elem, 'tns:text')
+        art_elem = et_elem.find('tns:eintragungsart', NS)
+        if art_elem is not None:
+            art_code = _text(art_elem, 'code')
+            if art_code and art_code in comment_map:
+                entry['type'] = comment_map[art_code]
+        entries.append(entry)
+    if entries:
+        detail['register_entries'] = entries
+
+    return detail
+
+
+def _text(parent, path):
+    """Extract text from an XML element, or None."""
+    if parent is None:
+        return None
+    elem = parent.find(path, NS)
+    if elem is not None and elem.text:
+        return elem.text.strip()
+    return None
+
+
+def _parse_beteiligter(beteiligter, comment_map):
+    """Parse a beteiligter element into a person/org dict."""
+    data = {}
+
+    # Natural person
+    person = beteiligter.find('.//tns:natuerlichePerson', NS)
+    if person is not None:
+        vorname = _text(person, './/tns:vorname')
+        nachname = _text(person, './/tns:nachname')
+        if vorname and nachname:
+            data['name'] = '%s %s' % (vorname, nachname)
+        elif nachname:
+            data['name'] = nachname
+
+        geburtsdatum = _text(person, './/tns:geburtsdatum')
+        if geburtsdatum:
+            data['date_of_birth'] = geburtsdatum
+
+        anschrift = person.find('tns:anschrift', NS)
+        if anschrift is not None:
+            data['city'] = _text(anschrift, 'tns:ort')
+
+        return data
+
+    # Organisation
+    org = beteiligter.find('.//tns:organisation', NS)
+    if org is not None:
+        data['name'] = _text(org, './/tns:bezeichnung.aktuell')
+
+        rechtsform = org.find('.//tns:rechtsform', NS)
+        if rechtsform is not None:
+            rf_code = _text(rechtsform, 'code')
+            if rf_code:
+                data['legal_form'] = comment_map.get(rf_code, rf_code)
+                data['legal_form_code'] = rf_code
+
+        sitz = org.find('tns:sitz', NS)
+        if sitz is not None:
+            data['seat'] = _text(sitz, 'tns:ort')
+
+        anschrift = org.find('tns:anschrift', NS)
+        if anschrift is not None:
+            addr = {}
+            street = _text(anschrift, 'tns:strasse')
+            hausnummer = _text(anschrift, 'tns:hausnummer')
+            if street:
+                addr['street'] = street + (' ' + hausnummer if hausnummer else '')
+            plz = _text(anschrift, 'tns:postleitzahl')
+            if plz:
+                addr['postal_code'] = plz
+            ort = _text(anschrift, 'tns:ort')
+            if ort:
+                addr['city'] = ort
+            if addr:
+                data['address'] = addr
+
+    return data
 
 
 def parse_result(result):
@@ -106,7 +411,7 @@ def parse_result(result):
         cells.append(cell.text.strip())
     d = {}
     d['court'] = cells[1]
-    
+
     # Extract register number: HRB, HRA, VR, GnR followed by numbers (e.g. HRB 12345, VR 6789)
     # Also capture suffix letter if present (e.g. HRB 12345 B), but avoid matching start of words (e.g. " Formerly")
     reg_match = re.search(r'(HRA|HRB|GnR|VR|PR)\s*\d+(\s+[A-Z])?(?!\w)', d['court'])
@@ -147,10 +452,65 @@ def pr_company_info(c):
     for name, loc in c.get('history'):
         print(name, loc)
 
+def pr_company_detail(detail):
+    """Print the SI detail fields in human-readable format."""
+    if not detail:
+        print('  (no detail data available)')
+        return
+
+    print()
+    print('--- Detail (SI) ---')
+    for key in ('name', 'legal_form', 'seat', 'business_purpose',
+                'articles_of_association_date', 'representation_rules'):
+        if key in detail:
+            label = key.replace('_', ' ').title()
+            print('%s: %s' % (label, detail[key]))
+
+    if 'address' in detail:
+        addr = detail['address']
+        parts = []
+        if 'street' in addr:
+            parts.append(addr['street'])
+        if 'postal_code' in addr and 'city' in addr:
+            parts.append('%s %s' % (addr['postal_code'], addr['city']))
+        elif 'city' in addr:
+            parts.append(addr['city'])
+        print('Address: %s' % ', '.join(parts))
+
+    if 'capital' in detail:
+        cap = detail['capital']
+        print('Capital: %s %s' % (cap['amount'], cap['currency']))
+
+    if 'directors' in detail:
+        print('Directors:')
+        for d in detail['directors']:
+            extra = ''
+            if d.get('representation'):
+                extra = ' (%s)' % d['representation']
+            print('  %s%s' % (d.get('name', '?'), extra))
+
+    if 'prokura' in detail:
+        print('Prokura:')
+        for p in detail['prokura']:
+            extra = ''
+            if p.get('representation'):
+                extra = ' (%s)' % p['representation']
+            print('  %s%s' % (p.get('name', '?'), extra))
+
+    if 'register_entries' in detail:
+        print('Register Entries:')
+        for entry in detail['register_entries']:
+            etype = entry.get('type', '')
+            text = entry.get('text', '')
+            print('  [%s] %s' % (etype, text))
+
+    print('Last Entry: %s' % detail.get('last_entry_date', '-'))
+    print('Retrieval Date: %s' % detail.get('retrieval_date', '-'))
+
 def get_companies_in_searchresults(html):
     soup = BeautifulSoup(html, 'html.parser')
     grid = soup.find('table', role='grid')
-  
+
     results = []
     for result in grid.find_all('tr'):
         a = result.get('data-ri')
@@ -195,6 +555,12 @@ def parse_args():
                           help="Return response as JSON",
                           action="store_true"
                         )
+    parser.add_argument(
+                          "-det",
+                          "--detail",
+                          help="Fetch SI (structured register content) detail for the first search result",
+                          action="store_true"
+                        )
     args = parser.parse_args()
 
 
@@ -214,8 +580,17 @@ def parse_args():
     h.open_startpage()
     companies = h.search_company()
     if companies is not None:
+        detail = None
+        if args.detail and len(companies) > 0:
+            detail = h.fetch_company_detail(result_index=0)
+
         if args.json:
-            print(json.dumps(companies))
+            output = companies
+            if detail:
+                output[0]['detail'] = detail
+            print(json.dumps(output))
         else:
             for c in companies:
                 pr_company_info(c)
+            if detail:
+                pr_company_detail(detail)
diff --git a/test_handelsregister.py b/test_handelsregister.py
index fa1951a..9b6a717 100644
--- a/test_handelsregister.py
+++ b/test_handelsregister.py
@@ -1,12 +1,12 @@
 import pytest
-from handelsregister import get_companies_in_searchresults,HandelsRegister
+from handelsregister import get_companies_in_searchresults,HandelsRegister,parse_si_detail
 import argparse
 
 def test_parse_search_result():
     html = '<html><body>%s</body></html>' % """<table role="grid"><thead></thead><tbody id="ergebnissForm:selectedSuchErgebnisFormTable_data" class="ui-datatable-data ui-widget-content"><tr data-ri="0" class="ui-widget-content ui-datatable-even" role="row"><td role="gridcell" colspan="9" class="borderBottom3"><table id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt147" class="ui-panelgrid ui-widget" role="grid"><tbody><tr class="ui-widget-content ui-panelgrid-even borderBottom1" role="row"><td role="gridcell" class="ui-panelgrid-cell fontTableNameSize" colspan="5">Berlin  <span class="fontWeightBold"> District court Berlin (Charlottenburg) HRB 44343  </span></td></tr><tr class="ui-widget-content ui-panelgrid-odd" role="row"><td role="gridcell" class="ui-panelgrid-cell paddingBottom20Px" colspan="5"><span class="marginLeft20">GASAG AG</span></td><td role="gridcell" class="ui-panelgrid-cell sitzSuchErgebnisse"><span class="verticalText ">Berlin</span></td><td role="gridcell" class="ui-panelgrid-cell" style="text-align: center;padding-bottom: 20px;"><span class="verticalText">currently registered</span></td><td role="gridcell" class="ui-panelgrid-cell textAlignLeft paddingBottom20Px" colspan="2"><div id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt160" class="ui-outputpanel ui-widget linksPanel"><script type="text/javascript" src="/rp_web/javax.faces.resource/jsf.js.xhtml?ln=javax.faces"></script><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:0:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:0:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:0:popupLink" class="underlinedText">AD</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:1:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:1:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:1:popupLink" class="underlinedText">CD</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:2:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:2:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:2:popupLink" class="underlinedText">HD</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:3:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:3:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:3:popupLink" class="underlinedText">DK</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:4:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:4:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:4:popupLink" class="underlinedText">UT</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:5:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:5:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:5:popupLink" class="underlinedText">VÖ</span></a><a id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:6:fade" href="#" class="dokumentList" aria-describedby="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:6:toolTipFade"><span id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt161:6:popupLink" class="underlinedText">SI</span></a></div></td></tr><tr class="ui-widget-content ui-panelgrid-even" role="row"><td role="gridcell" class="ui-panelgrid-cell" colspan="7"><table id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt172" class="ui-panelgrid ui-widget marginLeft20" role="grid"><tbody><tr class="ui-widget-content ui-panelgrid-even borderBottom1 RegPortErg_Klein" role="row"><td role="gridcell" class="ui-panelgrid-cell padding0Px">History</td></tr></tbody></table><table id="ergebnissForm:selectedSuchErgebnisFormTable:0:j_idt176" class="ui-panelgrid ui-widget" role="grid"><tbody><tr class="ui-widget-content" role="row"><td role="gridcell" class="ui-panelgrid-cell RegPortErg_HistorieZn marginLeft20 padding0Px" colspan="5"><span class="marginLeft20 fontSize85">1.) Gasag Berliner Gaswerke Aktiengesellschaft</span></td><td role="gridcell" class="ui-panelgrid-cell RegPortErg_SitzStatus "><span class="fontSize85">1.) Berlin</span></td><td role="gridcell" class="ui-panelgrid-cell textAlignCenter"></td></tr></tbody></table></td></tr></tbody></table></td></tr></tbody></table>"""
     res = get_companies_in_searchresults(html)
     assert res == [{
-            'court':'Berlin   District court Berlin (Charlottenburg) HRB 44343', 
+            'court':'Berlin   District court Berlin (Charlottenburg) HRB 44343',
             'register_num': 'HRB 44343 B',
             'name':'GASAG AG',
             'state':'Berlin',
@@ -17,6 +17,75 @@ def test_parse_search_result():
             },]
 
 
+def test_parse_si_detail():
+    """Test parsing of SI (Strukturierter Registerinhalt) XJustiz XML."""
+    si_xml = """<?xml version="1.0"?><tns:nachricht.reg.0400003 xmlns:tns="http://www.xjustiz.de" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><tns:nachrichtenkopf xjustizVersion="3.5.1"><tns:aktenzeichen.absender/><tns:aktenzeichen.empfaenger>unbekannt</tns:aktenzeichen.empfaenger><tns:erstellungszeitpunkt>2026-02-25T11:21:36Z</tns:erstellungszeitpunkt><tns:auswahl_absender><tns:absender.gericht listVersionID="3.5" listURI="urn:xoev-de:xjustiz:codeliste:gds.gerichte"><!--Amtsgericht Leipzig--><code>U1308</code></tns:absender.gericht></tns:auswahl_absender><tns:auswahl_empfaenger><tns:empfaenger.sonstige/></tns:auswahl_empfaenger><tns:eigeneNachrichtenID>test-id</tns:eigeneNachrichtenID><tns:ereignis listVersionID="1.5" listURI="urn:xoev-de:xjustiz:codeliste:gds.ereignis"><!--HR-Auszug--><code>002</code></tns:ereignis><tns:herstellerinformation><tns:nameDesProdukts>RegisSTAR</tns:nameDesProdukts><tns:herstellerDesProdukts>Test</tns:herstellerDesProdukts><tns:version>1.0</tns:version></tns:herstellerinformation></tns:nachrichtenkopf><tns:grunddaten><tns:verfahrensdaten><tns:instanzdaten><tns:instanznummer>0</tns:instanznummer><tns:auswahl_instanzbehoerde><tns:gericht listVersionID="3.5" listURI="urn:xoev-de:xjustiz:codeliste:gds.gerichte"><!--Amtsgericht Leipzig--><code>U1308</code></tns:gericht></tns:auswahl_instanzbehoerde><tns:aktenzeichen><tns:auswahl_aktenzeichen><tns:aktenzeichen.strukturiert><tns:register listVersionID="3.4" listURI="urn:xoev-de:xjustiz:codeliste:gds.registerzeichen"><!--HRB--><code>HRB</code></tns:register><tns:laufendeNummer>32007</tns:laufendeNummer><tns:jahr/></tns:aktenzeichen.strukturiert></tns:auswahl_aktenzeichen></tns:aktenzeichen><tns:verfahrensgegenstand><tns:gegenstand>Strukturierter Registerinhalt</tns:gegenstand></tns:verfahrensgegenstand></tns:instanzdaten><tns:beteiligung><tns:rolle><tns:rollennummer>1</tns:rollennummer><tns:rollenID><tns:id>145365283</tns:id><tns:ref.instanznummer>0</tns:ref.instanznummer></tns:rollenID><tns:rollenbezeichnung listVersionID="3.3" listURI="urn:xoev-de:xjustiz:codeliste:gds.rollenbezeichnung"><!--Rechtsträger(in)--><code>287</code></tns:rollenbezeichnung></tns:rolle><tns:beteiligter><tns:beteiligtennummer>1</tns:beteiligtennummer><tns:auswahl_beteiligter><tns:organisation><tns:bezeichnung><tns:bezeichnung.aktuell>Test Company GmbH</tns:bezeichnung.aktuell></tns:bezeichnung><tns:angabenZurRechtsform><tns:rechtsform listVersionID="2.2" listURI="urn:xoev-de:xunternehmen:codeliste:rechtsformen"><!--Gesellschaft mit beschränkter Haftung (GmbH)--><code>221110</code></tns:rechtsform></tns:angabenZurRechtsform><tns:sitz><tns:ort>Leipzig</tns:ort></tns:sitz><tns:anschrift><tns:anschriftstyp listVersionID="3.0" listURI="urn:xoev-de:xjustiz:codeliste:gds.anschriftstyp"><!--Dienst-/Geschäftsanschrift--><code>003</code></tns:anschriftstyp><tns:strasse>Teststraße</tns:strasse><tns:hausnummer>42</tns:hausnummer><tns:postleitzahl>04109</tns:postleitzahl><tns:ort>Leipzig</tns:ort><tns:staat listVersionID="6.1" listURI="urn:xoev-de:bund:bfj:codeliste:bfj.staat"><!--Deutschland--><code>000</code></tns:staat></tns:anschrift></tns:organisation></tns:auswahl_beteiligter></tns:beteiligter></tns:beteiligung><tns:beteiligung><tns:rolle><tns:rollennummer>4</tns:rollennummer><tns:rollenID><tns:id>145332515</tns:id><tns:ref.instanznummer>0</tns:ref.instanznummer></tns:rollenID><tns:rollenbezeichnung listVersionID="3.3" listURI="urn:xoev-de:xjustiz:codeliste:gds.rollenbezeichnung"><!--Geschäftsführer(in)--><code>086</code></tns:rollenbezeichnung></tns:rolle><tns:beteiligter><tns:beteiligtennummer>4</tns:beteiligtennummer><tns:auswahl_beteiligter><tns:natuerlichePerson><tns:vollerName><tns:vorname>Max</tns:vorname><tns:nachname>Mustermann</tns:nachname></tns:vollerName><tns:geburt><tns:geburtsdatum>1984-06-25</tns:geburtsdatum></tns:geburt><tns:anschrift><tns:anschriftstyp listVersionID="3.0" listURI="urn:xoev-de:xjustiz:codeliste:gds.anschriftstyp"><!--Privatanschrift--><code>017</code></tns:anschriftstyp><tns:ort>Leipzig</tns:ort></tns:anschrift></tns:natuerlichePerson></tns:auswahl_beteiligter></tns:beteiligter></tns:beteiligung><tns:beteiligung><tns:rolle><tns:rollennummer>5</tns:rollennummer><tns:rollenID><tns:id>145435568</tns:id><tns:ref.instanznummer>0</tns:ref.instanznummer></tns:rollenID><tns:rollenbezeichnung listVersionID="3.3" listURI="urn:xoev-de:xjustiz:codeliste:gds.rollenbezeichnung"><!--Prokurist(in)--><code>285</code></tns:rollenbezeichnung></tns:rolle><tns:beteiligter><tns:beteiligtennummer>5</tns:beteiligtennummer><tns:auswahl_beteiligter><tns:natuerlichePerson><tns:vollerName><tns:vorname>Erika</tns:vorname><tns:nachname>Musterfrau</tns:nachname></tns:vollerName><tns:geburt><tns:geburtsdatum>1990-01-15</tns:geburtsdatum></tns:geburt><tns:anschrift><tns:anschriftstyp listVersionID="3.0" listURI="urn:xoev-de:xjustiz:codeliste:gds.anschriftstyp"><!--Privatanschrift--><code>017</code></tns:anschriftstyp><tns:ort>Berlin</tns:ort></tns:anschrift></tns:natuerlichePerson></tns:auswahl_beteiligter></tns:beteiligter></tns:beteiligung></tns:verfahrensdaten></tns:grunddaten><tns:schriftgutobjekte/><tns:fachdatenRegister fachdatenRegisterVersion="3.4"><tns:mitteilungsart listVersionID="1.5" listURI="urn:xoev-de:xjustiz:codeliste:gds.ereignis"><!--HR-Auszug--><code>002</code></tns:mitteilungsart><tns:betroffenerRechtstraeger><tns:ref.rollennummer>1</tns:ref.rollennummer></tns:betroffenerRechtstraeger><tns:auszug><tns:eintragungstext><tns:spalte>6</tns:spalte><tns:position>1</tns:position><tns:laufendeNummer>2</tns:laufendeNummer><tns:eintragungsart listVersionID="1.1" listURI="urn:xoev-de:xjustiz:codeliste:reg.eintragungstyp"><!--Satzung--><code>009</code></tns:eintragungsart><tns:text>Gesellschaftsvertrag vom 18.06.2015.</tns:text></tns:eintragungstext><tns:abrufuhrzeit>11:21:36</tns:abrufuhrzeit><tns:abrufdatum>2026-02-25</tns:abrufdatum><tns:letzteEintragung>2025-06-02</tns:letzteEintragung><tns:anzahlEintragungen>7</tns:anzahlEintragungen><tns:letzteAenderung><tns:aenderungsdatum>2025-05-22</tns:aenderungsdatum></tns:letzteAenderung></tns:auszug><tns:basisdatenRegister><tns:satzungsdatum><tns:aktuellesSatzungsdatum>2015-06-18</tns:aktuellesSatzungsdatum></tns:satzungsdatum><tns:rechtstraeger><tns:bezeichnung><tns:bezeichnung.aktuell>Test Company GmbH</tns:bezeichnung.aktuell></tns:bezeichnung><tns:angabenZurRechtsform><tns:rechtsform listVersionID="2.2" listURI="urn:xoev-de:xunternehmen:codeliste:rechtsformen"><!--Gesellschaft mit beschränkter Haftung (GmbH)--><code>221110</code></tns:rechtsform></tns:angabenZurRechtsform><tns:sitz><tns:ort>Leipzig</tns:ort></tns:sitz><tns:anschrift><tns:anschriftstyp listVersionID="3.0" listURI="urn:xoev-de:xjustiz:codeliste:gds.anschriftstyp"><!--Dienst-/Geschäftsanschrift--><code>003</code></tns:anschriftstyp><tns:strasse>Teststraße</tns:strasse><tns:hausnummer>42</tns:hausnummer><tns:postleitzahl>04109</tns:postleitzahl><tns:ort>Leipzig</tns:ort><tns:staat listVersionID="6.1" listURI="urn:xoev-de:bund:bfj:codeliste:bfj.staat"><!--Deutschland--><code>000</code></tns:staat></tns:anschrift></tns:rechtstraeger><tns:vertretung><tns:allgemeineVertretungsregelung><tns:auswahl_vertretungsbefugnis><tns:vertretungsbefugnis listVersionID="2.3" listURI="urn:xoev-de:xjustiz:codeliste:reg.allgemeine-vertretungsregelung"><!--Ist nur ein Geschäftsführer bestellt, so vertritt er die Gesellschaft allein.--><code>066</code></tns:vertretungsbefugnis></tns:auswahl_vertretungsbefugnis></tns:allgemeineVertretungsregelung><tns:vertretungsberechtigte><tns:ref.rollennummer>4</tns:ref.rollennummer><tns:besondereVertretungsregelung><tns:auswahl_vertretungsbefugnis><tns:vertretungsbefugnisFreitext>einzelvertretungsberechtigt;</tns:vertretungsbefugnisFreitext></tns:auswahl_vertretungsbefugnis><tns:auswahl_befreiungVon181BGB><tns:befreiungVon181BGB listVersionID="2.0" listURI="urn:xoev-de:xjustiz:codeliste:reg.besondere-befreiung"><!--mit der Befugnis Rechtsgeschäfte abzuschließen--><code>011</code></tns:befreiungVon181BGB></tns:auswahl_befreiungVon181BGB></tns:besondereVertretungsregelung></tns:vertretungsberechtigte><tns:vertretungsberechtigte><tns:ref.rollennummer>5</tns:ref.rollennummer><tns:besondereVertretungsregelung><tns:auswahl_vertretungsbefugnis><tns:vertretungsbefugnis listVersionID="2.3" listURI="urn:xoev-de:xjustiz:codeliste:reg.besondere-vertretungsregelung"><!--Einzelprokura--><code>002</code></tns:vertretungsbefugnis></tns:auswahl_vertretungsbefugnis></tns:besondereVertretungsregelung></tns:vertretungsberechtigte></tns:vertretung><tns:gegenstand>Softwareentwicklung und IT-Beratung.</tns:gegenstand><tns:geschaeftszweck/></tns:basisdatenRegister><tns:auswahl_zusatzangaben><tns:kapitalgesellschaft><tns:zusatzGmbH><tns:stammkapital><tns:zahl>25000.00</tns:zahl><tns:auswahl_waehrung><tns:waehrung listVersionID="1.0" listURI="urn:xoev-de:bund:kba:codeliste:waehrung"><!--Euro--><code>EUR</code></tns:waehrung></tns:auswahl_waehrung></tns:stammkapital></tns:zusatzGmbH></tns:kapitalgesellschaft></tns:auswahl_zusatzangaben></tns:fachdatenRegister></tns:nachricht.reg.0400003>"""
+
+    detail = parse_si_detail(si_xml)
+
+    # Company info
+    assert detail['name'] == 'Test Company GmbH'
+    assert 'GmbH' in detail['legal_form']
+    assert detail['seat'] == 'Leipzig'
+
+    # Address
+    assert detail['address']['street'] == 'Teststraße 42'
+    assert detail['address']['postal_code'] == '04109'
+    assert detail['address']['city'] == 'Leipzig'
+
+    # Business purpose
+    assert 'Softwareentwicklung' in detail['business_purpose']
+
+    # Capital
+    assert detail['capital']['amount'] == '25000.00'
+    assert detail['capital']['currency'] == 'EUR'
+
+    # Directors
+    assert len(detail['directors']) == 1
+    assert detail['directors'][0]['name'] == 'Max Mustermann'
+    assert detail['directors'][0]['date_of_birth'] == '1984-06-25'
+
+    # Prokura
+    assert len(detail['prokura']) == 1
+    assert detail['prokura'][0]['name'] == 'Erika Musterfrau'
+
+    # Register info
+    assert detail['register_type'] == 'HRB'
+    assert detail['register_number'] == '32007'
+    assert detail['retrieval_date'] == '2026-02-25'
+    assert detail['last_entry_date'] == '2025-06-02'
+
+    # Articles of association
+    assert detail['articles_of_association_date'] == '2015-06-18'
+
+    # Register entries
+    assert len(detail['register_entries']) >= 1
+    assert 'Gesellschaftsvertrag' in detail['register_entries'][0]['text']
+
+
+def test_fetch_detail():
+    """Integration test: search for a known company and fetch its SI data."""
+    args = argparse.Namespace(debug=False, force=True, schlagwoerter='Gecko Two GmbH',
+                              schlagwortOptionen='exact', json=False, detail=True)
+    h = HandelsRegister(args)
+    h.open_startpage()
+    companies = h.search_company()
+    assert companies is not None
+    assert len(companies) > 0
+
+    detail = h.fetch_company_detail(result_index=0)
+    assert detail is not None
+
+    # Basic fields that should always be present
+    assert detail.get('name') is not None
+    assert 'Gecko' in detail['name']
+    assert detail.get('seat') is not None
+    assert detail.get('address') is not None
+    assert detail.get('capital') is not None
+    assert detail.get('directors') is not None
+    assert len(detail['directors']) > 0
+
+
 @pytest.mark.parametrize("company, state_id", [
     ("Hafen Hamburg", "Hamburg"),
     ("Bayerische Motoren Werke", "Bayern"),
@@ -37,7 +106,7 @@ def test_parse_search_result():
 ])
 def test_search_by_state_company(company, state_id):
 
-    args = argparse.Namespace(debug=False, force=True, schlagwoerter=company, schlagwortOptionen='all', json=False)
+    args = argparse.Namespace(debug=False, force=True, schlagwoerter=company, schlagwortOptionen='all', json=False, detail=False)
     h = HandelsRegister(args)
     h.open_startpage()
     companies = h.search_company()
@@ -45,13 +114,13 @@ def test_search_by_state_company(company, state_id):
     assert len(companies) > 0
 
 def test_haus_anker_b_suffix():
-    args = argparse.Namespace(debug=False, force=True, schlagwoerter='Haus-Anker Verwaltungs GmbH', schlagwortOptionen='exact', json=False)
+    args = argparse.Namespace(debug=False, force=True, schlagwoerter='Haus-Anker Verwaltungs GmbH', schlagwortOptionen='exact', json=False, detail=False)
     h = HandelsRegister(args)
     h.open_startpage()
     companies = h.search_company()
     assert companies is not None
-     
+
     target_company = next((c for c in companies if '138434' in c['register_num']), None)
-    
+
     assert target_company is not None, "Haus-Anker Verwaltungs GmbH with expected number not found"
-    assert target_company['register_num'] == 'HRB 138434 B'
\ No newline at end of file
+    assert target_company['register_num'] == 'HRB 138434 B'