Skip to content

Commit 3130f55

Browse files
committed
Refactor MI parsing to decrease the number of regex matches to perform
1 parent 72897f8 commit 3130f55

File tree

2 files changed

+151
-124
lines changed

2 files changed

+151
-124
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Internal changes
99

1010
- Update and freeze dependencies for documentation generation
11+
- Refactored the code to parse MI records to decrease the number of regex matches to perform
1112

1213
## 0.10.0.2
1314

pygdbmi/gdbmiparser.py

Lines changed: 150 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
See more at https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI.html#GDB_002fMI
88
"""
99

10+
import functools
1011
import logging
1112
import re
1213
from pprint import pprint
13-
from typing import Dict, Union
14+
from typing import Callable, Dict, List, Match, Optional, Pattern, Tuple, Union
1415

1516
from pygdbmi.printcolor import fmt_green
1617
from pygdbmi.StringStream import StringStream
@@ -53,59 +54,18 @@ def parse_response(gdb_mi_text: str) -> Dict:
5354
"""
5455
stream = StringStream(gdb_mi_text, debug=_DEBUG)
5556

56-
if _GDB_MI_NOTIFY_RE.match(gdb_mi_text):
57-
token, message, payload = _get_notify_msg_and_payload(gdb_mi_text, stream)
58-
return {
59-
"type": "notify",
60-
"message": message,
61-
"payload": payload,
62-
"token": token,
63-
}
64-
65-
elif _GDB_MI_RESULT_RE.match(gdb_mi_text):
66-
token, message, payload = _get_result_msg_and_payload(gdb_mi_text, stream)
67-
return {
68-
"type": "result",
69-
"message": message,
70-
"payload": payload,
71-
"token": token,
72-
}
73-
74-
elif _GDB_MI_CONSOLE_RE.match(gdb_mi_text):
75-
match = _GDB_MI_CONSOLE_RE.match(gdb_mi_text)
76-
if match:
77-
payload = unescape(match.groups()[0])
78-
else:
79-
payload = None
80-
return {
81-
"type": "console",
82-
"message": None,
83-
"payload": payload,
84-
}
85-
86-
elif _GDB_MI_LOG_RE.match(gdb_mi_text):
87-
match = _GDB_MI_LOG_RE.match(gdb_mi_text)
88-
if match:
89-
payload = unescape(match.groups()[0])
90-
else:
91-
payload = None
92-
return {"type": "log", "message": None, "payload": payload}
93-
94-
elif _GDB_MI_TARGET_OUTPUT_RE.match(gdb_mi_text):
95-
match = _GDB_MI_TARGET_OUTPUT_RE.match(gdb_mi_text)
96-
if match:
97-
payload = unescape(match.groups()[0])
98-
else:
99-
payload = None
100-
return {"type": "target", "message": None, "payload": payload}
101-
102-
elif response_is_finished(gdb_mi_text):
103-
return {"type": "done", "message": None, "payload": None}
57+
for pattern, parser in _GDB_MI_PATTERNS_AND_PARSERS:
58+
match = pattern.match(gdb_mi_text)
59+
if match is not None:
60+
return parser(match, stream)
10461

105-
else:
106-
# This was not gdb mi output, so it must have just been printed by
107-
# the inferior program that's being debugged
108-
return {"type": "output", "message": None, "payload": gdb_mi_text}
62+
# This was not gdb mi output, so it must have just been printed by
63+
# the inferior program that's being debugged
64+
return {
65+
"type": "output",
66+
"message": None,
67+
"payload": gdb_mi_text,
68+
}
10969

11070

11171
def response_is_finished(gdb_mi_text: str) -> bool:
@@ -117,11 +77,7 @@ def response_is_finished(gdb_mi_text: str) -> bool:
11777
Returns:
11878
True if gdb response is finished
11979
"""
120-
if _GDB_MI_RESPONSE_FINISHED_RE.match(gdb_mi_text):
121-
return True
122-
123-
else:
124-
return False
80+
return _GDB_MI_RESPONSE_FINISHED_RE.match(gdb_mi_text) is not None
12581

12682

12783
def assert_match(actual_char_or_str, expected_char_or_str):
@@ -142,43 +98,150 @@ def assert_match(actual_char_or_str, expected_char_or_str):
14298
# ========================================================================
14399

144100

145-
# GDB machine interface output patterns to match
146-
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
101+
def _parse_mi_notify(match: Match, stream: StringStream):
102+
"""Parser function for matches against a notify record.
103+
104+
See _GDB_MI_PATTERNS_AND_PARSERS for details."""
105+
message = match["message"]
106+
logger.debug("parsed message")
107+
logger.debug("%s", fmt_green(message))
147108

148-
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Result-Records.html#GDB_002fMI-Result-Records
149-
# In addition to a number of out-of-band notifications,
150-
# the response to a gdb/mi command includes one of the following result indications:
151-
# done, running, connected, error, exit
152-
_GDB_MI_RESULT_RE = re.compile(r"^(\d*)\^(\S+?)(,.*)?$")
109+
return {
110+
"type": "notify",
111+
"message": message.strip(),
112+
"payload": _extract_payload(match, stream),
113+
"token": _extract_token(match),
114+
}
153115

154-
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Async-Records.html#GDB_002fMI-Async-Records
155-
# Async records are used to notify the gdb/mi client of additional
156-
# changes that have occurred. Those changes can either be a consequence
157-
# of gdb/mi commands (e.g., a breakpoint modified) or a result of target activity
158-
# (e.g., target stopped).
159-
_GDB_MI_NOTIFY_RE = re.compile(r"^(\d*)[*=](\S+?)(,.*)*$")
160116

161-
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
162-
# "~" string-output
163-
# The console output stream contains text that should be displayed
164-
# in the CLI console window. It contains the textual responses to CLI commands.
165-
_GDB_MI_CONSOLE_RE = re.compile(r'~"(.*)"', re.DOTALL)
117+
def _parse_mi_result(match: Match, stream: StringStream):
118+
"""Parser function for matches against a result record.
166119
167-
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
168-
# "&" string-output
169-
# The log stream contains debugging messages being produced by gdb's internals.
170-
_GDB_MI_LOG_RE = re.compile(r'&"(.*)"', re.DOTALL)
120+
See _GDB_MI_PATTERNS_AND_PARSERS for details."""
121+
return {
122+
"type": "result",
123+
"message": match["message"],
124+
"payload": _extract_payload(match, stream),
125+
"token": _extract_token(match),
126+
}
127+
128+
129+
def _parse_mi_output(match: Match, stream: StringStream, output_type: str):
130+
"""Parser function for matches against a console, log or target record.
131+
132+
The record type must be specified in output_type.
133+
134+
See _GDB_MI_PATTERNS_AND_PARSERS for details."""
135+
return {
136+
"type": output_type,
137+
"message": None,
138+
"payload": unescape(match["payload"]),
139+
}
140+
141+
142+
def _parse_mi_finished(match: Match, stream: StringStream):
143+
"""Parser function for matches against a finished record.
144+
145+
See _GDB_MI_PATTERNS_AND_PARSERS for details."""
146+
return {
147+
"type": "done",
148+
"message": None,
149+
"payload": None,
150+
}
171151

172-
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
173-
# "@" string-output
174-
# The target output stream contains any textual output from the
175-
# running target. This is only present when GDB's event loop is truly asynchronous,
176-
# which is currently only the case for remote targets.
177-
_GDB_MI_TARGET_OUTPUT_RE = re.compile(r'@"(.*)"', re.DOTALL)
178152

179-
# Response finished
153+
def _extract_token(match: Match) -> Optional[int]:
154+
"""Extract a token from a match against a regular expression which included
155+
_GDB_MI_COMPONENT_TOKEN."""
156+
token = match["token"]
157+
return int(token) if token is not None else None
158+
159+
160+
def _extract_payload(match: Match, stream: StringStream) -> Optional[Dict]:
161+
"""Extract a token from a match against a regular expression which included
162+
_GDB_MI_COMPONENT_PAYLOAD."""
163+
if match["payload"] is None:
164+
return None
165+
166+
stream.advance_past_chars([","])
167+
return _parse_dict(stream)
168+
169+
170+
# A regular expression matching a response finished record.
180171
_GDB_MI_RESPONSE_FINISHED_RE = re.compile(r"^\(gdb\)\s*$")
181172

173+
# Regular expression identifying a token in a MI record.
174+
_GDB_MI_COMPONENT_TOKEN = r"(?P<token>\d+)?"
175+
# Regular expression identifying a payload in a MI record.
176+
_GDB_MI_COMPONENT_PAYLOAD = r"(?P<payload>,.*)?"
177+
178+
# The type of the functions which parse MI records as used by
179+
# _GDB_MI_PATTERNS_AND_PARSERS.
180+
_PARSER_FUNCTION = Callable[[Match, StringStream], Dict]
181+
182+
# A list where each item is a tuple of:
183+
# - A compiled regular expression matching a MI record.
184+
# - A function which is called if the regex matched with the match and a StringStream.
185+
# It must return a dictionary with details on the MI record..
186+
#
187+
# For more details on the MI , see
188+
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
189+
#
190+
# The order matters as items are iterated in ordered and that stops once a match is
191+
# found.
192+
_GDB_MI_PATTERNS_AND_PARSERS: List[Tuple[Pattern, _PARSER_FUNCTION]] = [
193+
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Result-Records.html#GDB_002fMI-Result-Records
194+
# In addition to a number of out-of-band notifications,
195+
# the response to a gdb/mi command includes one of the following result indications:
196+
# done, running, connected, error, exit
197+
(
198+
re.compile(
199+
rf"^{_GDB_MI_COMPONENT_TOKEN}\^(?P<message>\S+?){_GDB_MI_COMPONENT_PAYLOAD}$"
200+
),
201+
_parse_mi_result,
202+
),
203+
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Async-Records.html#GDB_002fMI-Async-Records
204+
# Async records are used to notify the gdb/mi client of additional
205+
# changes that have occurred. Those changes can either be a consequence
206+
# of gdb/mi commands (e.g., a breakpoint modified) or a result of target activity
207+
# (e.g., target stopped).
208+
(
209+
re.compile(
210+
rf"^{_GDB_MI_COMPONENT_TOKEN}[*=](?P<message>\S+?){_GDB_MI_COMPONENT_PAYLOAD}$"
211+
),
212+
_parse_mi_notify,
213+
),
214+
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
215+
# "~" string-output
216+
# The console output stream contains text that should be displayed
217+
# in the CLI console window. It contains the textual responses to CLI commands.
218+
(
219+
re.compile(r'~"(?P<payload>.*)"', re.DOTALL),
220+
functools.partial(_parse_mi_output, output_type="console"),
221+
),
222+
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
223+
# "&" string-output
224+
# The log stream contains debugging messages being produced by gdb's internals.
225+
(
226+
re.compile(r'&"(?P<payload>.*)"', re.DOTALL),
227+
functools.partial(_parse_mi_output, output_type="log"),
228+
),
229+
# https://sourceware.org/gdb/onlinedocs/gdb/GDB_002fMI-Stream-Records.html#GDB_002fMI-Stream-Records
230+
# "@" string-output
231+
# The target output stream contains any textual output from the
232+
# running target. This is only present when GDB's event loop is truly asynchronous,
233+
# which is currently only the case for remote targets.
234+
(
235+
re.compile(r'@"(?P<payload>.*)"', re.DOTALL),
236+
functools.partial(_parse_mi_output, output_type="target"),
237+
),
238+
(
239+
_GDB_MI_RESPONSE_FINISHED_RE,
240+
_parse_mi_finished,
241+
),
242+
]
243+
244+
182245
_WHITESPACE = [" ", "\t", "\r", "\n"]
183246

184247
_GDB_MI_CHAR_DICT_START = "{"
@@ -191,43 +254,6 @@ def assert_match(actual_char_or_str, expected_char_or_str):
191254
]
192255

193256

194-
def _get_notify_msg_and_payload(result, stream: StringStream):
195-
"""Get notify message and payload dict"""
196-
match = _GDB_MI_NOTIFY_RE.match(result)
197-
assert match is not None
198-
groups = match.groups()
199-
token = int(groups[0]) if groups[0] != "" else None
200-
message = groups[1]
201-
202-
logger.debug("parsed message")
203-
logger.debug("%s", fmt_green(message))
204-
205-
if groups[2] is None:
206-
payload = None
207-
else:
208-
stream.advance_past_chars([","])
209-
payload = _parse_dict(stream)
210-
211-
return token, message.strip(), payload
212-
213-
214-
def _get_result_msg_and_payload(result, stream: StringStream):
215-
"""Get result message and payload dict"""
216-
match = _GDB_MI_RESULT_RE.match(result)
217-
assert match is not None
218-
groups = match.groups()
219-
token = int(groups[0]) if groups[0] != "" else None
220-
message = groups[1]
221-
222-
if groups[2] is None:
223-
payload = None
224-
else:
225-
stream.advance_past_chars([","])
226-
payload = _parse_dict(stream)
227-
228-
return token, message, payload
229-
230-
231257
def _parse_dict(stream: StringStream):
232258
"""Parse dictionary, with optional starting character '{'
233259
return (tuple):

0 commit comments

Comments
 (0)