Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
29 changes: 29 additions & 0 deletions multiversx_logs_counter/counter_archive_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from datetime import datetime, timedelta

from multiversx_logs_parser_tools.archive_handler import ArchiveHandler

from .counter_checker import CounterChecker, CounterParser


class CounterArchiveHandler(ArchiveHandler):
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing docstring for the CounterArchiveHandler class. Public classes should have docstrings explaining their purpose and usage in the archive processing workflow.

Suggested change
class CounterArchiveHandler(ArchiveHandler):
class CounterArchiveHandler(ArchiveHandler):
"""Archive handler that runs a CounterChecker over log archives.
This class coordinates archive traversal (inherited from
:class:`ArchiveHandler`) with a :class:`CounterChecker` instance,
which parses and validates log files using the provided
:class:`CounterParser`. It is typically instantiated with a
configured ``CounterChecker`` and the root ``logs_path`` of the
archive to be processed, and then used via ``handle_logs()`` to
perform the full archive processing workflow.
"""

Copilot uses AI. Check for mistakes.
def __init__(self, checker: CounterChecker, logs_path: str):
self.checker = checker
# self.shard_data = ShardData()
super().__init__(checker, logs_path)

def process_node_data(self):
"""Process the parsed data for a single node."""
# node_data = HeaderData()
# node_data.header_dictionary = self.checker.parsed
# self.shard_data.add_node(node_data)
Comment on lines +11 to +18
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commented-out code should be removed. If this functionality is planned for future implementation, it should be tracked in an issue or TODO comment instead of leaving non-functional commented code.

Suggested change
# self.shard_data = ShardData()
super().__init__(checker, logs_path)
def process_node_data(self):
"""Process the parsed data for a single node."""
# node_data = HeaderData()
# node_data.header_dictionary = self.checker.parsed
# self.shard_data.add_node(node_data)
super().__init__(checker, logs_path)
def process_node_data(self):
"""Process the parsed data for a single node."""
# TODO: implement node data processing logic if needed.

Copilot uses AI. Check for mistakes.
pass


if __name__ == "__main__":
time_started = datetime.now()
print('Starting log entry analysis...')
args = CounterArchiveHandler.get_path()
counter_checker = CounterChecker(CounterParser, args)
handler = CounterArchiveHandler(counter_checker, args.path)
handler.handle_logs()
print(f'Archive checked successfully: {timedelta(seconds=(datetime.now() - time_started).total_seconds())}s')
38 changes: 38 additions & 0 deletions multiversx_logs_counter/counter_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from argparse import Namespace
from typing import Any

from multiversx_logs_parser_tools.node_logs_checker import NodeLogsChecker

from .counter_parser import CounterData, CounterParser


class CounterChecker(NodeLogsChecker):

Comment on lines +9 to +10
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing docstring for the CounterChecker class. Public classes should have docstrings explaining their purpose and their role in the log counting workflow.

Suggested change
class CounterChecker(NodeLogsChecker):
class CounterChecker(NodeLogsChecker):
"""
CounterChecker coordinates log parsing and counting for a single node.
It uses a CounterParser to extract and count log messages, aggregates the
results into CounterData, and produces per-node JSON reports as part of
the overall log counting workflow.
"""

Copilot uses AI. Check for mistakes.
def __init__(self, parser_cls: type[CounterParser], args: Namespace):
super().__init__(parser_cls, args)

def initialize_checker(self, args):
self.parsed = CounterData()
return super().initialize_checker(args)
Comment on lines +15 to +16
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The initialization order may be problematic. The initialize_checker method creates a CounterData instance for self.parsed, then calls super().initialize_checker(args), which may overwrite or interfere with the initialization. Consider calling super() first, then initializing self.parsed.

Suggested change
self.parsed = CounterData()
return super().initialize_checker(args)
result = super().initialize_checker(args)
self.parsed = CounterData()
return result

Copilot uses AI. Check for mistakes.

def process_parsed_result(self):
self.parsed.add_counted_messages(self.parser.counters)
self.parser.initialize_checker()

def post_process_node_logs(self):
# Implement post-processing logic here
self.write_node_json(path=f'./Reports/{self.run_name}/LogEntryCounter')

def create_json_for_node(self) -> dict[str, Any]:
for log_level, messages in self.parsed.counter_dictionary.items():
messages_sorted = dict(sorted(messages.items(), key=lambda item: item[1], reverse=True))
self.parsed.counter_dictionary[log_level] = messages_sorted
return {
"node_name": self.node_name,
"run_name": self.run_name,
"counter": self.parsed.counter_dictionary
}

def reset_node(self, args: Namespace):
super().reset_node(args)
self.parsed = CounterData()
39 changes: 39 additions & 0 deletions multiversx_logs_counter/counter_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from re import Pattern
from typing import Any

from multiversx_logs_parser_tools.aho_corasik_parser import AhoCorasickParser

from .counter_structures import CounterData


class CounterParser(AhoCorasickParser):
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing docstring for the CounterParser class. Public classes should have docstrings explaining their purpose and how they extend or differ from the base AhoCorasickParser class.

Suggested change
class CounterParser(AhoCorasickParser):
class CounterParser(AhoCorasickParser):
"""
Parser specialized for counting log messages by severity level.
This class configures the underlying :class:`AhoCorasickParser` with a
fixed set of log-level patterns (INFO, TRACE, DEBUG, WARN, ERROR) and
accumulates the parsed messages in a :class:`CounterData` instance.
It overrides the base parser's hook methods to:
* declare the patterns of interest via :meth:`get_patterns`,
* reset the internal counters in :meth:`initialize_checker`,
* delegate low-level matching to the base implementation in
:meth:`process_match`, and
* update the message counters in :meth:`process_parsed_entry`.
"""

Copilot uses AI. Check for mistakes.
def __init__(self):
super().__init__()
self.counters = CounterData()

def get_patterns(self) -> list[tuple[Pattern[str], int]]:
patterns = []
patterns.append(('INFO', 0))
patterns.append(('TRACE', 1))
patterns.append(('DEBUG', 2))
patterns.append(('WARN', 3))
patterns.append(('ERROR', 4))

return patterns
Comment on lines +14 to +22
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type annotation mismatch in return type. The method returns a list of tuples containing strings and integers, but the type hint indicates Pattern objects. The return type should be list[tuple[str, int]] instead of list[tuple[Pattern[str], int]].

Copilot uses AI. Check for mistakes.

def initialize_checker(self) -> None:
self.counters = CounterData()

def process_match(self, line: str, end_index: int, pattern_idx: int, args: dict[str, str]) -> dict[str, Any]:
parsed = super().process_match(line, end_index, pattern_idx, args)
return parsed

Comment on lines +27 to +30
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The process_match method calls super().process_match() but doesn't utilize the returned value meaningfully - it just returns it directly without any additional processing. Consider whether this override is necessary or if the implementation adds value.

Suggested change
def process_match(self, line: str, end_index: int, pattern_idx: int, args: dict[str, str]) -> dict[str, Any]:
parsed = super().process_match(line, end_index, pattern_idx, args)
return parsed

Copilot uses AI. Check for mistakes.
def process_parsed_entry(self, parsed_entry: dict[str, Any], args: dict[str, str]) -> None:
level = parsed_entry.get('logger_level', 'UNKNOWN')
message = parsed_entry.get('message', '')

if message:
self.counters.add_message(level, message)

def should_parse_line(self, pattern: Pattern[str]) -> bool:
return True
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The should_parse_line method always returns True, making the pattern parameter unused. Consider removing the parameter if it's not needed, or implement conditional logic if different patterns should be handled differently.

Suggested change
return True
"""
Decide whether a line associated with the given pattern should be parsed.
This implementation returns True for the log level patterns that this parser
is configured to handle. For the patterns currently returned by get_patterns(),
this method behaves the same as the previous unconditional True.
"""
allowed_levels = {"INFO", "TRACE", "DEBUG", "WARN", "ERROR"}
# Pattern may be a compiled regex or another string-like object.
pattern_str = getattr(pattern, "pattern", str(pattern))
return any(level in pattern_str for level in allowed_levels)

Copilot uses AI. Check for mistakes.
32 changes: 32 additions & 0 deletions multiversx_logs_counter/counter_structures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from enum import Enum

LogLevel = Enum("LogLevel", [
'INFO',
'TRACE',
'DEBUG',
'WARN',
'ERROR'
])


class CounterData:

Comment on lines +12 to +13
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing docstring for the CounterData class. Public classes should have docstrings explaining their purpose, usage, and key attributes.

Suggested change
class CounterData:
class CounterData:
"""
Container for counting log messages per log level.
This class maintains a nested dictionary structure where the first key is
the log level name (as defined in ``LogLevel``) and the second key is the
log message string. The corresponding value is the number of times that
message has been observed at that log level.
Attributes
----------
counter_dictionary : dict[str, dict[str, int]]
Maps a log level name to a dictionary of message strings and their
associated occurrence counts.
Usage
-----
- Use :meth:`add_message` to increment the count for a single message.
- Use :meth:`add_counted_messages` to merge counts from another
:class:`CounterData` instance.
- Use :meth:`reset` to clear all stored counts.
"""

Copilot uses AI. Check for mistakes.
def __init__(self):
self.counter_dictionary = {
log_level.name: {} for log_level in LogLevel
}

def reset(self):
self.counter_dictionary = {
log_level.name: {} for log_level in LogLevel
}

def add_message(self, log_level: str, message: str, count=1) -> None:
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing input validation for log_level parameter. The method should validate that the log_level exists in the counter_dictionary to prevent KeyError when an unexpected log level is passed.

Suggested change
def add_message(self, log_level: str, message: str, count=1) -> None:
def add_message(self, log_level: str, message: str, count=1) -> None:
if log_level not in self.counter_dictionary:
raise ValueError(f"Unknown log level: {log_level}")

Copilot uses AI. Check for mistakes.
if message not in self.counter_dictionary[log_level]:
self.counter_dictionary[log_level][message] = 0
self.counter_dictionary[log_level][message] += count

def add_counted_messages(self, messages: 'CounterData') -> None:
for log_level, message_dict in messages.counter_dictionary.items():
for message, count in message_dict.items():
self.add_message(log_level, message, count)
71 changes: 60 additions & 11 deletions multiversx_logs_parser_tools/entry_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary blank line at the start of the file. This adds no value and should be removed for consistency.

Suggested change

Copilot uses AI. Check for mistakes.
import json
import re
from typing import Any
Expand Down Expand Up @@ -44,20 +45,49 @@ def parse_context(self, context: str) -> dict[str, Any]:
return {'shard': '', 'epoch': 0, 'round': 0, 'subround': ''}

def parse_message(self, message: str):

# header hash entry
m = re.search(r'header\s+hash:\s*([0-9a-fA-F]+)', message)
if m:
return "HEADER TABLE", f"hash={m.group(1)}"

# Epoch begins
m = re.search(r'EPOCH\s+(\d+)\s+BEGINS\s+IN\s+ROUND\s+\((\d+)\)', message, re.IGNORECASE)
if m:
epoch, rnd = m.group(1), m.group(2)
return "BEGIN EPOCH", f"epoch={epoch}; round={rnd}"

# ROUND begins
m = re.search(r'ROUND\s+(-?\d+)\s+BEGINS', message, re.IGNORECASE)
if m:
return "BEGIN ROUND", f"round={m.group(1)}"

# SUBROUND begins
m = re.search(r'SUBROUND\s+\(([^)]+)\)\s+BEGINS', message, re.IGNORECASE)
if m:
return "BEGIN SUBROUND", f"subround={m.group(1)}"

# Proposed block added
m = re.search(r'Added proposed block with nonce\s+(\d+)', message, re.IGNORECASE)
if m:
return "PROPOSED BLOCK", f"nonce={m.group(1)}"

# Assembled block added
m = re.search(r'Added\s+assembled\s+block\s+with\s+nonce\s+(\d+)', message, re.IGNORECASE)
if m:
return "ASSEMBLED BLOCK", f"nonce={m.group(1)}"

# --- common logic ---
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says "common logic" but this could be more descriptive. Consider using a comment that better describes what this fallback logic does, such as "Parse using default separator-based or equals-sign-based logic".

Suggested change
# --- common logic ---
# Fallback: parse using default separator-based or equals-sign-based logic

Copilot uses AI. Check for mistakes.
if separator in message:
# if the separator is present, split the content between message and parameters using the separator
message, parameters = message.split(separator, 1)
return message.strip(), parameters.strip()
msg, params = message.split(separator, 1)
return msg.strip(), params.strip()

elif ' = ' in message:
# if no separator, but the content includes '=', assume first parameter is the word before the '=' and split before that word
message_parts = message.split(' = ', 1)
message, first_parameter_label = message_parts[0].rsplit(' ', 1)
return message.strip(), first_parameter_label.strip() + ' = ' + message_parts[1].strip()
if ' = ' in message:
parts = message.split(' = ', 1)
msg, label = parts[0].rsplit(' ', 1)
return msg.strip(), label.strip() + ' = ' + parts[1].strip()

else:
# no parameters in the entry or cannot determine if there are parameters present
return message.strip(), ''
return message.strip(), ''

def parse_log_entry(self, log_content: str) -> dict[str, str]:
data = {}
Expand All @@ -81,3 +111,22 @@ def parse_log_entry(self, log_content: str) -> dict[str, str]:
header = json.loads(parameter)
print(json.dumps(result, indent=4))
print(json.dumps(header, indent=4))

content_list = ['DEBUG[2025-11-12 08:57:53.007] [..ensus/chronology] [0/0/1/(END_ROUND)] 2025-11-12 08:57:53.000932854 ################################### ROUND 2 BEGINS (1762937873000) ################################### ',
'DEBUG[2025-11-12 08:56:47.006] [..ensus/chronology] [0/0/-10/] 2025-11-12 08:56:47.000575216 ################################## ROUND -9 BEGINS (1762937807000) ##################################',
'DEBUG[2025-11-12 13:36:32.680][..nsus/spos/bls/v2][0/13/23926/(END_ROUND)] 2025-11-12 13:36:32.680514567 + ++++++++++++++++++++++ Added proposed block with nonce 23922 in blockchain + ++++++++++++++++++++++ ',
'DEBUG[2025-11-12 13:36:33.200][..ensus/chronology][0/13/23927/(END_ROUND)] 2025-11-12 13:36:33.200192207 ................................... SUBROUND(START_ROUND) BEGINS ................................... ',
'DEBUG[2025-11-12 13:36:33.202][..ensus/chronology][0/13/23927/(START_ROUND)] 2025-11-12 13:36:33.202271507 ...................................... SUBROUND(BLOCK) BEGINS ...................................... ',
'DEBUG[2025-11-12 13:36:33.228][..ensus/chronology][0/13/23927/(BLOCK)] 2025-11-12 13:36:33.227644328 .................................... SUBROUND(SIGNATURE) BEGINS .................................... ',
'DEBUG[2025-11-12 13:36:33.234][..ensus/chronology][0/13/23927/(SIGNATURE)] 2025-11-12 13:36:33.234186524 .................................... SUBROUND(END_ROUND) BEGINS .................................... ',
'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)] # EPOCH 9 BEGINS IN ROUND (14409) ##################################'
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing newline between the two test log entries. Line 122 and 123 should be separated with a comma and newline for better readability and to match the pattern of the list.

Suggested change
'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)] # EPOCH 9 BEGINS IN ROUND (14409) ##################################'
'DEBUG[2025-11-12 12:01:22.747][..Start/shardchain][0/8/14409/(END_ROUND)] # EPOCH 9 BEGINS IN ROUND (14409) ##################################',

Copilot uses AI. Check for mistakes.
'DEBUG[2025-11-12 09:27:35.419] [process/block] [0/1/299/(END_ROUND)] header hash: 209622a0c04a556829507a7a2176aaee2a58a524766b805dc9a423e2fe023072',
'DEBUG[2025-11-12 09:01:35.041] [..nsus/spos/bls/v1] [0/0/39/(END_ROUND)] 2025-11-12 09:01:35.035239399 ------------------------ Added assembled block with nonce 39 in blockchain ------------------------',
]
for content in content_list:
result = EntryParser('').parse_log_entry(content)
print(json.dumps(result, indent=4))

content = 'DEBUG[2025-11-12 08:57:47.151] [process/sync] [0/0/1/(END_ROUND)] forkDetector.appendHeaderInfo round = 1 nonce = 1 hash = 310001ecb0e9f441b916adf87b71cc959745f48ba07b78c9df5741b2b35bbf8d state = 0 probable highest nonce = 1 last checkpoint nonce = 1 final checkpoint nonce = 0 has proof = true '
result = EntryParser('').parse_log_entry(content)
print(json.dumps(result, indent=4))
Comment on lines +115 to +132
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test or debug code should not be committed to the main codebase. This appears to be test code in the main block that should either be moved to proper unit tests or removed.

Copilot uses AI. Check for mistakes.
4 changes: 2 additions & 2 deletions multiversx_logs_parser_tools/node_logs_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ def write_node_json(self, path=''):
if not path:
node_reports_path = f'./Reports/{self.run_name}/Nodes'
output_file = Path(f'{node_reports_path}/{self.node_name}_report.json')
directory = os.path.dirname(output_file)
Path(directory).mkdir(parents=True, exist_ok=True)
else:
output_file = Path(path + f'/{self.node_name}_report.json')
directory = os.path.dirname(output_file)
Path(directory).mkdir(parents=True, exist_ok=True)
with open(output_file, "w") as json_file:
json.dump(self.create_json_for_node(), json_file, indent=4)

Expand Down