Skip to content

Commit 89695bf

Browse files
committed
fixup! don't be too smart. bye array
1 parent 2e58716 commit 89695bf

1 file changed

Lines changed: 115 additions & 94 deletions

File tree

Lib/profiling/sampling/gecko_collector.py

Lines changed: 115 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -65,19 +65,24 @@
6565

6666
# In-memory buffer before spilling to disk
6767
DEFAULT_SPILL_BUFFER_BYTES = 128 * 1024
68+
_JSON_SEPARATORS = (",", ":")
69+
_JSON_ENCODER = json.JSONEncoder(
70+
separators=_JSON_SEPARATORS, allow_nan=False
71+
)
6872

6973

7074
class SpillColumn:
71-
_encoder = json.JSONEncoder(separators=(",", ":"))
72-
7375
def __init__(self, directory, basename, *,
74-
buffer_bytes=DEFAULT_SPILL_BUFFER_BYTES):
76+
buffer_bytes=None):
7577
self.path = os.path.join(directory, basename)
7678
self.buffer = bytearray()
77-
self._buffer_bytes = buffer_bytes
79+
self._buffer_bytes = (
80+
DEFAULT_SPILL_BUFFER_BYTES if buffer_bytes is None
81+
else buffer_bytes
82+
)
7883

7984
def append(self, value):
80-
self.buffer += (self._encoder.encode(value) + "\n").encode()
85+
self.buffer += (_JSON_ENCODER.encode(value) + "\n").encode("utf-8")
8186
if len(self.buffer) >= self._buffer_bytes:
8287
self.flush()
8388

@@ -146,7 +151,6 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False):
146151
# Per-thread data structures
147152
self.threads = {} # tid -> thread data
148153
self.spill_dir = None
149-
self.thread_spills = {}
150154
self.exported = False
151155

152156
# Global tables
@@ -333,7 +337,7 @@ def collect(self, stack_frames, timestamps_us=None):
333337
stack_index = self._process_stack(thread_data, frames)
334338

335339
# Add samples with timestamps
336-
thread_spill = self.thread_spills[tid]
340+
thread_spill = thread_data["_spill"]
337341
for t in times:
338342
thread_spill.append_sample(stack_index, t)
339343

@@ -365,8 +369,6 @@ def _create_thread(self, tid, is_main_thread):
365369
if self.spill_dir is None:
366370
self.spill_dir = tempfile.TemporaryDirectory()
367371

368-
self.thread_spills[tid] = GeckoThreadSpill(self.spill_dir.name, tid)
369-
370372
thread = {
371373
"name": f"Thread-{tid}",
372374
"isMainThread": is_main_thread,
@@ -434,6 +436,7 @@ def _create_thread(self, tid, is_main_thread):
434436
"_frameCache": {},
435437
"_funcCache": {},
436438
"_resourceCache": {},
439+
"_spill": GeckoThreadSpill(self.spill_dir.name, tid),
437440
}
438441

439442
return thread
@@ -461,13 +464,16 @@ def _add_marker(self, tid, name, start_time, end_time, category):
461464
duration = end_time - start_time
462465

463466
name_idx = self._intern_string(name)
464-
self.thread_spills[tid].append_marker(name_idx, start_time, end_time, 1, category, {
465-
"type": name.replace(" ", ""),
466-
"duration": duration,
467-
"tid": tid
468-
})
467+
self.threads[tid]["_spill"].append_marker(
468+
name_idx, start_time, end_time, 1, category, {
469+
"type": name.replace(" ", ""),
470+
"duration": duration,
471+
"tid": tid,
472+
}
473+
)
469474

470-
def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, funcname, start_time, end_time):
475+
def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset,
476+
funcname, start_time, end_time):
471477
"""Add an interval marker for opcode execution span."""
472478
if tid not in self.threads or opcode is None:
473479
return
@@ -478,17 +484,19 @@ def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, funcname,
478484

479485
name_idx = self._intern_string(formatted_opname)
480486

481-
self.thread_spills[tid].append_marker(name_idx, start_time, end_time, 1, CATEGORY_OPCODES, {
482-
"type": "Opcode",
483-
"opcode": opcode,
484-
"opname": formatted_opname,
485-
"base_opname": opcode_info["base_opname"],
486-
"is_specialized": opcode_info["is_specialized"],
487-
"line": lineno,
488-
"column": col_offset if col_offset >= 0 else None,
489-
"function": funcname,
490-
"duration": end_time - start_time,
491-
})
487+
self.threads[tid]["_spill"].append_marker(
488+
name_idx, start_time, end_time, 1, CATEGORY_OPCODES, {
489+
"type": "Opcode",
490+
"opcode": opcode,
491+
"opname": formatted_opname,
492+
"base_opname": opcode_info["base_opname"],
493+
"is_specialized": opcode_info["is_specialized"],
494+
"line": lineno,
495+
"column": col_offset if col_offset >= 0 else None,
496+
"function": funcname,
497+
"duration": end_time - start_time,
498+
}
499+
)
492500

493501
def _process_stack(self, thread_data, frames):
494502
"""Process a stack and return the stack index."""
@@ -742,8 +750,7 @@ def spin():
742750
os.unlink(temp_path)
743751
except FileNotFoundError:
744752
pass
745-
if self.spill_dir is not None:
746-
self.spill_dir.cleanup()
753+
self._cleanup_spills()
747754

748755
print(f"Gecko profile written to {filename}")
749756
print(
@@ -784,8 +791,7 @@ def _build_profile(self):
784791
return json.loads(file.getvalue())
785792
finally:
786793
self.exported = True
787-
if self.spill_dir is not None:
788-
self.spill_dir.cleanup()
794+
self._cleanup_spills()
789795

790796
def _profile_head(self):
791797
return {
@@ -833,34 +839,38 @@ def _prepare_for_serialization(self):
833839
if self.exported:
834840
raise RuntimeError("GeckoCollector has already been exported")
835841
self._finalize_markers()
836-
for spill in self.thread_spills.values():
837-
spill.prepare_read()
838842
for thread_data in self.threads.values():
843+
thread_data["_spill"].prepare_read()
839844
thread_data["stackTable"]["length"] = len(thread_data["stackTable"]["frame"])
840845
thread_data["frameTable"]["length"] = len(thread_data["frameTable"]["func"])
841846
thread_data["funcTable"]["length"] = len(thread_data["funcTable"]["name"])
842847
thread_data["resourceTable"]["length"] = len(thread_data["resourceTable"]["name"])
843848

849+
def _cleanup_spills(self):
850+
if self.spill_dir is not None:
851+
self.spill_dir.cleanup()
852+
self.spill_dir = None
853+
844854
def _stream_profile(self, file):
845-
head = json.dumps(
846-
self._profile_head(), separators=(",", ":"), allow_nan=False
847-
)[1:-1]
848-
tail = json.dumps(
849-
self._profile_tail(), separators=(",", ":"), allow_nan=False
850-
)[1:-1]
851855
file.write("{")
852-
file.write(head)
853-
file.write(',"threads":[')
856+
first = True
857+
for key, value in self._profile_head().items():
858+
first = _write_json_member(file, key, value, first)
859+
860+
first = _write_member_name(file, "threads", first)
861+
file.write("[")
854862
for index, (tid, thread_data) in enumerate(self.threads.items()):
855863
if index:
856864
file.write(",")
857865
self._stream_thread(file, tid, thread_data)
858-
file.write("],")
859-
file.write(tail)
866+
file.write("]")
867+
868+
for key, value in self._profile_tail().items():
869+
first = _write_json_member(file, key, value, first)
860870
file.write("}")
861871

862872
def _stream_thread(self, file, tid, thread_data):
863-
spill = self.thread_spills[tid]
873+
spill = thread_data["_spill"]
864874
metadata = {
865875
"name": thread_data["name"],
866876
"isMainThread": thread_data["isMainThread"],
@@ -875,8 +885,11 @@ def _stream_thread(self, file, tid, thread_data):
875885
"processName": thread_data["processName"],
876886
}
877887
file.write("{")
878-
file.write(json.dumps(metadata, separators=(",", ":"), allow_nan=False)[1:-1])
879-
file.write(',"samples":')
888+
first = True
889+
for key, value in metadata.items():
890+
first = _write_json_member(file, key, value, first)
891+
892+
first = _write_member_name(file, "samples", first)
880893
self._stream_samples(file, spill)
881894
for key in (
882895
"stackTable",
@@ -885,66 +898,74 @@ def _stream_thread(self, file, tid, thread_data):
885898
"resourceTable",
886899
"nativeSymbols",
887900
):
888-
file.write(',"')
889-
file.write(key)
890-
file.write('":')
891-
file.write(json.dumps(
892-
thread_data[key], separators=(",", ":"), allow_nan=False
893-
))
894-
file.write(',"markers":')
901+
first = _write_json_member(file, key, thread_data[key], first)
902+
first = _write_member_name(file, "markers", first)
895903
self._stream_markers(file, spill)
896904
file.write("}")
897905

898906
def _stream_samples(self, file, spill):
899-
file.write('{"stack":')
900-
_stream_array(
901-
file, spill.samples_stack.iter_tokens(), spill.sample_count
902-
)
903-
file.write(',"time":')
904-
_stream_array(
905-
file, spill.samples_time.iter_tokens(), spill.sample_count
906-
)
907-
file.write(',"eventDelay":')
908-
_stream_array(
907+
_stream_column_table(
909908
file,
910-
("null" for _ in range(spill.sample_count)),
909+
(
910+
("stack", spill.samples_stack.iter_tokens()),
911+
("time", spill.samples_time.iter_tokens()),
912+
("eventDelay", ("null" for _ in range(spill.sample_count))),
913+
),
911914
spill.sample_count,
915+
(
916+
("weight", None),
917+
("weightType", "samples"),
918+
("length", spill.sample_count),
919+
),
912920
)
913-
file.write(',"weight":null,"weightType":"samples","length":')
914-
file.write(repr(spill.sample_count))
915-
file.write("}")
916921

917922
def _stream_markers(self, file, spill):
918-
file.write('{"data":')
919-
_stream_array(
920-
file, spill.markers_data.iter_tokens(), spill.marker_count
921-
)
922-
file.write(',"name":')
923-
_stream_array(
924-
file, spill.markers_name.iter_tokens(), spill.marker_count
925-
)
926-
file.write(',"startTime":')
927-
_stream_array(
928-
file, spill.markers_start_time.iter_tokens(), spill.marker_count
929-
)
930-
file.write(',"endTime":')
931-
_stream_array(
932-
file, spill.markers_end_time.iter_tokens(), spill.marker_count
933-
)
934-
file.write(',"phase":')
935-
_stream_array(
936-
file, spill.markers_phase.iter_tokens(), spill.marker_count
937-
)
938-
file.write(',"category":')
939-
_stream_array(
940-
file, spill.markers_category.iter_tokens(), spill.marker_count
923+
_stream_column_table(
924+
file,
925+
(
926+
("data", spill.markers_data.iter_tokens()),
927+
("name", spill.markers_name.iter_tokens()),
928+
("startTime", spill.markers_start_time.iter_tokens()),
929+
("endTime", spill.markers_end_time.iter_tokens()),
930+
("phase", spill.markers_phase.iter_tokens()),
931+
("category", spill.markers_category.iter_tokens()),
932+
),
933+
spill.marker_count,
934+
(("length", spill.marker_count),),
941935
)
942-
file.write(',"length":')
943-
file.write(repr(spill.marker_count))
944-
file.write("}")
945936

946937

947-
def _stream_array(file, token_iter, expected_count):
938+
def _write_json(file, value):
939+
for chunk in _JSON_ENCODER.iterencode(value):
940+
file.write(chunk)
941+
942+
943+
def _write_member_name(file, name, first):
944+
if not first:
945+
file.write(",")
946+
_write_json(file, name)
947+
file.write(":")
948+
return False
949+
950+
951+
def _write_json_member(file, name, value, first):
952+
first = _write_member_name(file, name, first)
953+
_write_json(file, value)
954+
return first
955+
956+
957+
def _stream_column_table(file, columns, expected_count, trailing_members=()):
958+
file.write("{")
959+
first = True
960+
for name, token_iter in columns:
961+
first = _write_member_name(file, name, first)
962+
_stream_array(file, token_iter, expected_count, name)
963+
for name, value in trailing_members:
964+
first = _write_json_member(file, name, value, first)
965+
file.write("}")
966+
967+
968+
def _stream_array(file, token_iter, expected_count, label="array"):
948969
file.write("[")
949970
count = 0
950971
for token in token_iter:
@@ -954,6 +975,6 @@ def _stream_array(file, token_iter, expected_count):
954975
count += 1
955976
if count != expected_count:
956977
raise RuntimeError(
957-
f"streamed {count} array items, expected {expected_count}"
978+
f"streamed {count} {label} items, expected {expected_count}"
958979
)
959980
file.write("]")

0 commit comments

Comments
 (0)