multiversx · mradian1 · Dec 9, 2025 · Dec 9, 2025 · Copilot · Dec 19, 2025
diff --git a/multiversx_logs_counter/__init__.py b/multiversx_logs_counter/__init__.py
diff --git a/multiversx_logs_counter/counter_archive_handler.py b/multiversx_logs_counter/counter_archive_handler.py
@@ -0,0 +1,29 @@
+from datetime import datetime, timedelta
+
+from multiversx_logs_parser_tools.archive_handler import ArchiveHandler
+
+from .counter_checker import CounterChecker, CounterParser
+
+
+class CounterArchiveHandler(ArchiveHandler):
-class CounterArchiveHandler(ArchiveHandler):
+class CounterArchiveHandler(ArchiveHandler):
+    """Archive handler that runs a CounterChecker over log archives.
+
+    This class coordinates archive traversal (inherited from
+    :class:`ArchiveHandler`) with a :class:`CounterChecker` instance,
+    which parses and validates log files using the provided
+    :class:`CounterParser`. It is typically instantiated with a
+    configured ``CounterChecker`` and the root ``logs_path`` of the
+    archive to be processed, and then used via ``handle_logs()`` to
+    perform the full archive processing workflow.
+    """
-class CounterArchiveHandler(ArchiveHandler):
+class CounterArchiveHandler(ArchiveHandler):
+    """Archive handler that runs a CounterChecker over log archives.
+
+    This class coordinates archive traversal (inherited from
+    :class:`ArchiveHandler`) with a :class:`CounterChecker` instance,
+    which parses and validates log files using the provided
+    :class:`CounterParser`. It is typically instantiated with a
+    configured ``CounterChecker`` and the root ``logs_path`` of the
+    archive to be processed, and then used via ``handle_logs()`` to
+    perform the full archive processing workflow.
+    """
+    def __init__(self, checker: CounterChecker, logs_path: str):
+        self.checker = checker
+        # self.shard_data = ShardData()
+        super().__init__(checker, logs_path)
+
+    def process_node_data(self):
+        """Process the parsed data for a single node."""
+        # node_data = HeaderData()
+        # node_data.header_dictionary = self.checker.parsed
+        # self.shard_data.add_node(node_data)
-        # self.shard_data = ShardData()
-        super().__init__(checker, logs_path)
-
-    def process_node_data(self):
-        """Process the parsed data for a single node."""
-        # node_data = HeaderData()
-        # node_data.header_dictionary = self.checker.parsed
-        # self.shard_data.add_node(node_data)
+        super().__init__(checker, logs_path)
+
+    def process_node_data(self):
+        """Process the parsed data for a single node."""
+        # TODO: implement node data processing logic if needed.
-        # self.shard_data = ShardData()
-        super().__init__(checker, logs_path)
-
-    def process_node_data(self):
-        """Process the parsed data for a single node."""
-        # node_data = HeaderData()
-        # node_data.header_dictionary = self.checker.parsed
-        # self.shard_data.add_node(node_data)
+        super().__init__(checker, logs_path)
+
+    def process_node_data(self):
+        """Process the parsed data for a single node."""
+        # TODO: implement node data processing logic if needed.
+        pass
+
+
+if __name__ == "__main__":
+    time_started = datetime.now()
+    print('Starting log entry analysis...')
+    args = CounterArchiveHandler.get_path()
+    counter_checker = CounterChecker(CounterParser, args)
+    handler = CounterArchiveHandler(counter_checker, args.path)
+    handler.handle_logs()
+    print(f'Archive checked successfully: {timedelta(seconds=(datetime.now() - time_started).total_seconds())}s')
diff --git a/multiversx_logs_counter/counter_checker.py b/multiversx_logs_counter/counter_checker.py
@@ -0,0 +1,38 @@
+from argparse import Namespace
+from typing import Any
+
+from multiversx_logs_parser_tools.node_logs_checker import NodeLogsChecker
+
+from .counter_parser import CounterData, CounterParser
+
+
+class CounterChecker(NodeLogsChecker):
+
-class CounterChecker(NodeLogsChecker):
+class CounterChecker(NodeLogsChecker):
+    """
+    CounterChecker coordinates log parsing and counting for a single node.
+
+    It uses a CounterParser to extract and count log messages, aggregates the
+    results into CounterData, and produces per-node JSON reports as part of
+    the overall log counting workflow.
+    """
-class CounterChecker(NodeLogsChecker):
+class CounterChecker(NodeLogsChecker):
+    """
+    CounterChecker coordinates log parsing and counting for a single node.
+
+    It uses a CounterParser to extract and count log messages, aggregates the
+    results into CounterData, and produces per-node JSON reports as part of
+    the overall log counting workflow.
+    """
+    def __init__(self, parser_cls: type[CounterParser], args: Namespace):
+        super().__init__(parser_cls, args)
+
+    def initialize_checker(self, args):
+        self.parsed = CounterData()
+        return super().initialize_checker(args)
-        self.parsed = CounterData()
-        return super().initialize_checker(args)
+        result = super().initialize_checker(args)
+        self.parsed = CounterData()
+        return result
-        self.parsed = CounterData()
-        return super().initialize_checker(args)
+        result = super().initialize_checker(args)
+        self.parsed = CounterData()
+        return result
+
+    def process_parsed_result(self):
+        self.parsed.add_counted_messages(self.parser.counters)
+        self.parser.initialize_checker()
+
+    def post_process_node_logs(self):
+        # Implement post-processing logic here
+        self.write_node_json(path=f'./Reports/{self.run_name}/LogEntryCounter')
+
+    def create_json_for_node(self) -> dict[str, Any]:
+        for log_level, messages in self.parsed.counter_dictionary.items():
+            messages_sorted = dict(sorted(messages.items(), key=lambda item: item[1], reverse=True))
+            self.parsed.counter_dictionary[log_level] = messages_sorted
+        return {
+            "node_name": self.node_name,
+            "run_name": self.run_name,
+            "counter": self.parsed.counter_dictionary
+        }
+
+    def reset_node(self, args: Namespace):
+        super().reset_node(args)
+        self.parsed = CounterData()
diff --git a/multiversx_logs_counter/counter_parser.py b/multiversx_logs_counter/counter_parser.py
@@ -0,0 +1,39 @@
+from re import Pattern
+from typing import Any
+
+from multiversx_logs_parser_tools.aho_corasik_parser import AhoCorasickParser
+
+from .counter_structures import CounterData
+
+
+class CounterParser(AhoCorasickParser):
-class CounterParser(AhoCorasickParser):
+class CounterParser(AhoCorasickParser):
+    """
+    Parser specialized for counting log messages by severity level.
+
+    This class configures the underlying :class:`AhoCorasickParser` with a
+    fixed set of log-level patterns (INFO, TRACE, DEBUG, WARN, ERROR) and
+    accumulates the parsed messages in a :class:`CounterData` instance.
+
+    It overrides the base parser's hook methods to:
+      * declare the patterns of interest via :meth:`get_patterns`,
+      * reset the internal counters in :meth:`initialize_checker`,
+      * delegate low-level matching to the base implementation in
+        :meth:`process_match`, and
+      * update the message counters in :meth:`process_parsed_entry`.
+    """
-class CounterParser(AhoCorasickParser):
+class CounterParser(AhoCorasickParser):
+    """
+    Parser specialized for counting log messages by severity level.
+
+    This class configures the underlying :class:`AhoCorasickParser` with a
+    fixed set of log-level patterns (INFO, TRACE, DEBUG, WARN, ERROR) and
+    accumulates the parsed messages in a :class:`CounterData` instance.
+
+    It overrides the base parser's hook methods to:
+      * declare the patterns of interest via :meth:`get_patterns`,
+      * reset the internal counters in :meth:`initialize_checker`,
+      * delegate low-level matching to the base implementation in
+        :meth:`process_match`, and
+      * update the message counters in :meth:`process_parsed_entry`.
+    """
+    def __init__(self):
+        super().__init__()
+        self.counters = CounterData()
+
+    def get_patterns(self) -> list[tuple[Pattern[str], int]]:
+        patterns = []
+        patterns.append(('INFO', 0))
+        patterns.append(('TRACE', 1))
+        patterns.append(('DEBUG', 2))
+        patterns.append(('WARN', 3))
+        patterns.append(('ERROR', 4))
+
+        return patterns
+
+    def initialize_checker(self) -> None:
+        self.counters = CounterData()
+
+    def process_match(self, line: str, end_index: int, pattern_idx: int, args: dict[str, str]) -> dict[str, Any]:
+        parsed = super().process_match(line, end_index, pattern_idx, args)
+        return parsed
+
-    def process_match(self, line: str, end_index: int, pattern_idx: int, args: dict[str, str]) -> dict[str, Any]:
-        parsed = super().process_match(line, end_index, pattern_idx, args)
-        return parsed
-    def process_match(self, line: str, end_index: int, pattern_idx: int, args: dict[str, str]) -> dict[str, Any]:
-        parsed = super().process_match(line, end_index, pattern_idx, args)
-        return parsed
+    def process_parsed_entry(self, parsed_entry: dict[str, Any], args: dict[str, str]) -> None:
+        level = parsed_entry.get('logger_level', 'UNKNOWN')
+        message = parsed_entry.get('message', '')
+
+        if message:
+            self.counters.add_message(level, message)
+
+    def should_parse_line(self, pattern: Pattern[str]) -> bool:
+        return True
-        return True
+        """
+        Decide whether a line associated with the given pattern should be parsed.
+
+        This implementation returns True for the log level patterns that this parser
+        is configured to handle. For the patterns currently returned by get_patterns(),
+        this method behaves the same as the previous unconditional True.
+        """
+        allowed_levels = {"INFO", "TRACE", "DEBUG", "WARN", "ERROR"}
+
+        # Pattern may be a compiled regex or another string-like object.
+        pattern_str = getattr(pattern, "pattern", str(pattern))
+
+        return any(level in pattern_str for level in allowed_levels)
-        return True
+        """
+        Decide whether a line associated with the given pattern should be parsed.
+
+        This implementation returns True for the log level patterns that this parser
+        is configured to handle. For the patterns currently returned by get_patterns(),
+        this method behaves the same as the previous unconditional True.
+        """
+        allowed_levels = {"INFO", "TRACE", "DEBUG", "WARN", "ERROR"}
+
+        # Pattern may be a compiled regex or another string-like object.
+        pattern_str = getattr(pattern, "pattern", str(pattern))
+
+        return any(level in pattern_str for level in allowed_levels)
diff --git a/multiversx_logs_counter/counter_structures.py b/multiversx_logs_counter/counter_structures.py
@@ -0,0 +1,32 @@
+from enum import Enum
+
+LogLevel = Enum("LogLevel", [
+    'INFO',
+    'TRACE',
+    'DEBUG',
+    'WARN',
+    'ERROR'
+])
+
+
+class CounterData:
+
-class CounterData:
+class CounterData:
+    """
+    Container for counting log messages per log level.
+
+    This class maintains a nested dictionary structure where the first key is
+    the log level name (as defined in ``LogLevel``) and the second key is the
+    log message string. The corresponding value is the number of times that
+    message has been observed at that log level.
+
+    Attributes
+    ----------
+    counter_dictionary : dict[str, dict[str, int]]
+        Maps a log level name to a dictionary of message strings and their
+        associated occurrence counts.
+
+    Usage
+    -----
+    - Use :meth:`add_message` to increment the count for a single message.
+    - Use :meth:`add_counted_messages` to merge counts from another
+      :class:`CounterData` instance.
+    - Use :meth:`reset` to clear all stored counts.
+    """
-class CounterData:
+class CounterData:
+    """
+    Container for counting log messages per log level.
+
+    This class maintains a nested dictionary structure where the first key is
+    the log level name (as defined in ``LogLevel``) and the second key is the
+    log message string. The corresponding value is the number of times that
+    message has been observed at that log level.
+
+    Attributes
+    ----------
+    counter_dictionary : dict[str, dict[str, int]]
+        Maps a log level name to a dictionary of message strings and their
+        associated occurrence counts.
+
+    Usage
+    -----
+    - Use :meth:`add_message` to increment the count for a single message.
+    - Use :meth:`add_counted_messages` to merge counts from another
+      :class:`CounterData` instance.
+    - Use :meth:`reset` to clear all stored counts.
+    """
+    def __init__(self):
+        self.counter_dictionary = {
+            log_level.name: {} for log_level in LogLevel
+        }
+
+    def reset(self):
+        self.counter_dictionary = {
+            log_level.name: {} for log_level in LogLevel
+        }
+
+    def add_message(self, log_level: str, message: str, count=1) -> None:
-    def add_message(self, log_level: str, message: str, count=1) -> None:
+    def add_message(self, log_level: str, message: str, count=1) -> None:
+        if log_level not in self.counter_dictionary:
+            raise ValueError(f"Unknown log level: {log_level}")
-    def add_message(self, log_level: str, message: str, count=1) -> None:
+    def add_message(self, log_level: str, message: str, count=1) -> None:
+        if log_level not in self.counter_dictionary:
+            raise ValueError(f"Unknown log level: {log_level}")
+        if message not in self.counter_dictionary[log_level]:
+            self.counter_dictionary[log_level][message] = 0
+        self.counter_dictionary[log_level][message] += count
+
+    def add_counted_messages(self, messages: 'CounterData') -> None:
+        for log_level, message_dict in messages.counter_dictionary.items():
+            for message, count in message_dict.items():
+                self.add_message(log_level, message, count)
diff --git a/multiversx_logs_parser_tools/entry_parser.py b/multiversx_logs_parser_tools/entry_parser.py
@@ -1,3 +1,4 @@
+
-
-
 import json
 import re
 from typing import Any
@@ -44,20 +45,49 @@ def parse_context(self, context: str) -> dict[str, Any]:
             return {'shard': '', 'epoch': 0, 'round': 0, 'subround': ''}
 
     def parse_message(self, message: str):
+
+        # header hash entry
+        m = re.search(r'header\s+hash:\s*([0-9a-fA-F]+)', message)
+        if m:
+            return "HEADER TABLE", f"hash={m.group(1)}"
+
+        # Epoch begins
+        m = re.search(r'EPOCH\s+(\d+)\s+BEGINS\s+IN\s+ROUND\s+\((\d+)\)', message, re.IGNORECASE)
+        if m:
+            epoch, rnd = m.group(1), m.group(2)
+            return "BEGIN EPOCH", f"epoch={epoch}; round={rnd}"
+
+        # ROUND begins
+        m = re.search(r'ROUND\s+(-?\d+)\s+BEGINS', message, re.IGNORECASE)
+        if m:
+            return "BEGIN ROUND", f"round={m.group(1)}"
+
+        # SUBROUND begins
+        m = re.search(r'SUBROUND\s+\(([^)]+)\)\s+BEGINS', message, re.IGNORECASE)
+        if m:
+            return "BEGIN SUBROUND", f"subround={m.group(1)}"
+
+        # Proposed block added
+        m = re.search(r'Added proposed block with nonce\s+(\d+)', message, re.IGNORECASE)
+        if m:
+            return "PROPOSED BLOCK", f"nonce={m.group(1)}"
+
+        # Assembled block added
+        m = re.search(r'Added\s+assembled\s+block\s+with\s+nonce\s+(\d+)', message, re.IGNORECASE)
+        if m:
+            return "ASSEMBLED BLOCK", f"nonce={m.group(1)}"
+
+        # --- common logic ---
-        # --- common logic ---
+        # Fallback: parse using default separator-based or equals-sign-based logic
-        # --- common logic ---
+        # Fallback: parse using default separator-based or equals-sign-based logic
         if separator in message:
-            # if the separator is present, split the content between message and parameters using the separator
-            message, parameters = message.split(separator, 1)
-            return message.strip(), parameters.strip()
+            msg, params = message.split(separator, 1)
+            return msg.strip(), params.strip()
 
-        elif ' = ' in message:
-            # if no separator, but the content includes '=', assume first parameter is the word before the '=' and split before that word
-            message_parts = message.split(' = ', 1)
-            message, first_parameter_label = message_parts[0].rsplit(' ', 1)
-            return message.strip(), first_parameter_label.strip() + ' = ' + message_parts[1].strip()
+        if ' = ' in message:
+            parts = message.split(' = ', 1)
+            msg, label = parts[0].rsplit(' ', 1)
+            return msg.strip(), label.strip() + ' = ' + parts[1].strip()
 
-        else:
-            # no parameters in the entry or cannot determine if there are parameters present
-            return message.strip(), ''
+        return message.strip(), ''
 
     def parse_log_entry(self, log_content: str) -> dict[str, str]:
         data = {}
@@ -81,3 +111,22 @@ def parse_log_entry(self, log_content: str) -> dict[str, str]:
     header = json.loads(parameter)
     print(json.dumps(result, indent=4))
     print(json.dumps(header, indent=4))
+
+    content_list = ['DEBUG[2025-11-12 08:57:53.007] [..ensus/chronology] [0/0/1/(END_ROUND)] 2025-11-12 08:57:53.000932854  ################################### ROUND 2 BEGINS (1762937873000) ################################### ',
+                    'DEBUG[2025-11-12 08:56:47.006] [..ensus/chronology] [0/0/-10/] 2025-11-12 08:56:47.000575216 ################################## ROUND -9 BEGINS (1762937807000) ##################################',
+                    'DEBUG[2025-11-12 13:36:32.680][..nsus/spos/bls/v2][0/13/23926/(END_ROUND)] 2025-11-12 13:36:32.680514567 + ++++++++++++++++++++++ Added proposed block with nonce 23922 in blockchain + ++++++++++++++++++++++ ',
+                    'DEBUG[2025-11-12 13:36:33.200][..ensus/chronology][0/13/23927/(END_ROUND)] 2025-11-12 13:36:33.200192207 ................................... SUBROUND(START_ROUND) BEGINS ................................... ',
+                    'DEBUG[2025-11-12 13:36:33.202][..ensus/chronology][0/13/23927/(START_ROUND)] 2025-11-12 13:36:33.202271507 ...................................... SUBROUND(BLOCK) BEGINS ...................................... ',
+                    'DEBUG[2025-11-12 13:36:33.228][..ensus/chronology][0/13/23927/(BLOCK)] 2025-11-12 13:36:33.227644328 .................................... SUBROUND(SIGNATURE) BEGINS .................................... ',
+                    'DEBUG[2025-11-12 13:36:33.234][..ensus/chronology][0/13/23927/(SIGNATURE)] 2025-11-12 13:36:33.234186524 .................................... SUBROUND(END_ROUND) BEGINS .................................... ',
+                    'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)]  # EPOCH 9 BEGINS IN ROUND (14409) ##################################'
-                    'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)]  # EPOCH 9 BEGINS IN ROUND (14409) ##################################'
+                    'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)]  # EPOCH 9 BEGINS IN ROUND (14409) ##################################',
-                    'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)]  # EPOCH 9 BEGINS IN ROUND (14409) ##################################'
+                    'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)]  # EPOCH 9 BEGINS IN ROUND (14409) ##################################',
+                    'DEBUG[2025-11-12 09:27:35.419] [process/block] [0/1/299/(END_ROUND)] header hash: 209622a0c04a556829507a7a2176aaee2a58a524766b805dc9a423e2fe023072',
+                    'DEBUG[2025-11-12 09:01:35.041] [..nsus/spos/bls/v1] [0/0/39/(END_ROUND)] 2025-11-12 09:01:35.035239399 ------------------------ Added assembled block with nonce 39 in blockchain ------------------------',
+                    ]
+    for content in content_list:
+        result = EntryParser('').parse_log_entry(content)
+        print(json.dumps(result, indent=4))
+
+    content = 'DEBUG[2025-11-12 08:57:47.151] [process/sync]       [0/0/1/(END_ROUND)] forkDetector.appendHeaderInfo            round = 1 nonce = 1 hash = 310001ecb0e9f441b916adf87b71cc959745f48ba07b78c9df5741b2b35bbf8d state = 0 probable highest nonce = 1 last checkpoint nonce = 1 final checkpoint nonce = 0 has proof = true '
+    result = EntryParser('').parse_log_entry(content)
+    print(json.dumps(result, indent=4))
diff --git a/multiversx_logs_parser_tools/node_logs_checker.py b/multiversx_logs_parser_tools/node_logs_checker.py
@@ -78,10 +78,10 @@ def write_node_json(self, path=''):
         if not path:
             node_reports_path = f'./Reports/{self.run_name}/Nodes'
             output_file = Path(f'{node_reports_path}/{self.node_name}_report.json')
-            directory = os.path.dirname(output_file)
-            Path(directory).mkdir(parents=True, exist_ok=True)
         else:
             output_file = Path(path + f'/{self.node_name}_report.json')
+        directory = os.path.dirname(output_file)
+        Path(directory).mkdir(parents=True, exist_ok=True)
         with open(output_file, "w") as json_file:
             json.dump(self.create_json_for_node(), json_file, indent=4)