|
3 | 3 | from inspect import iscoroutinefunction |
4 | 4 | from inspect import isgeneratorfunction |
5 | 5 | import sys |
| 6 | +from types import CodeType |
6 | 7 | from types import FrameType |
7 | 8 | from types import FunctionType |
8 | 9 | from types import TracebackType |
|
18 | 19 |
|
19 | 20 | T = t.TypeVar("T") |
20 | 21 |
|
| 22 | +# ============================================================================ |
| 23 | +# Lazy wrapping machinery: allows deferring expensive bytecode instrumentation |
| 24 | +# until the first time a function is actually called. |
| 25 | +# ============================================================================ |
| 26 | + |
| 27 | +_lazy_registry: t.Dict[CodeType, "LazyMeta"] = {} |
| 28 | + |
| 29 | + |
| 30 | +class LazyMeta: |
| 31 | + """Metadata for a lazily-wrapped function.""" |
| 32 | + |
| 33 | + __slots__ = ("func", "builder", "lock", "initialized") |
| 34 | + |
| 35 | + def __init__(self, func: FunctionType, builder: t.Callable[[FunctionType], CodeType]): |
| 36 | + self.func = func # the original function object |
| 37 | + self.builder = builder # callable: builder(func) -> CodeType |
| 38 | + self.initialized = False |
| 39 | + |
| 40 | + |
| 41 | +def __lazy_trampoline_entry(*args, **kwargs): |
| 42 | + """ |
| 43 | + Called from the trampoline code on first invocation. |
| 44 | + Uses the current frame's code object to find the right LazyMeta, |
| 45 | + builds the heavy bytecode, and swaps it in place. |
| 46 | + """ |
| 47 | + # Get the caller's frame (the trampoline frame) |
| 48 | + code = sys._getframe(1).f_code |
| 49 | + meta = _lazy_registry[code] |
| 50 | + |
| 51 | + if not meta.initialized: |
| 52 | + if not meta.initialized: |
| 53 | + # Build heavy code from the original pre-wrapped function |
| 54 | + new_code = meta.builder(meta.func) |
| 55 | + # Swap the code on the SAME function object frameworks already hold |
| 56 | + meta.func.__code__ = new_code |
| 57 | + meta.initialized = True |
| 58 | + # Drop the registry entry to free memory |
| 59 | + _lazy_registry.pop(code, None) |
| 60 | + |
| 61 | + # Now call the function again; this time it runs the heavy code |
| 62 | + return meta.func(*args, **kwargs) |
| 63 | + |
| 64 | + |
| 65 | +def _make_trampoline_code(template_code: CodeType) -> CodeType: |
| 66 | + """ |
| 67 | + Build a tiny code object that: |
| 68 | + - takes *args, **kwargs |
| 69 | + - calls the global __lazy_trampoline_entry(*args, **kwargs) |
| 70 | + - returns its value |
| 71 | +
|
| 72 | + Note: This only works for functions with 0 freevars. |
| 73 | + """ |
| 74 | + bc = Bytecode() |
| 75 | + bc.name = template_code.co_name |
| 76 | + bc.filename = template_code.co_filename |
| 77 | + bc.first_lineno = template_code.co_firstlineno |
| 78 | + |
| 79 | + # Function signature: accepts *args, **kwargs |
| 80 | + bc.argcount = 0 |
| 81 | + bc.posonlyargcount = 0 |
| 82 | + bc.kwonlyargcount = 0 |
| 83 | + bc.flags = bytecode.CompilerFlags.VARARGS | bytecode.CompilerFlags.VARKEYWORDS |
| 84 | + |
| 85 | + # Declare locals for *args, **kwargs |
| 86 | + bc.argnames = ["args", "kwargs"] |
| 87 | + |
| 88 | + # No freevars or cellvars |
| 89 | + bc.freevars = [] |
| 90 | + bc.cellvars = [] |
| 91 | + |
| 92 | + # Call the global entry: __lazy_trampoline_entry(*args, **kwargs) |
| 93 | + if sys.version_info >= (3, 13): |
| 94 | + # Python 3.13+ |
| 95 | + bc.extend( |
| 96 | + [ |
| 97 | + bytecode.Instr("LOAD_GLOBAL", (True, "__lazy_trampoline_entry")), # (True = NULL + func) |
| 98 | + bytecode.Instr("LOAD_FAST", "args"), |
| 99 | + bytecode.Instr("BUILD_MAP", 0), |
| 100 | + bytecode.Instr("LOAD_FAST", "kwargs"), |
| 101 | + bytecode.Instr("DICT_MERGE", 1), |
| 102 | + bytecode.Instr("CALL_FUNCTION_EX", 1), |
| 103 | + bytecode.Instr("RETURN_VALUE"), |
| 104 | + ] |
| 105 | + ) |
| 106 | + elif sys.version_info >= (3, 11): |
| 107 | + # Python 3.11-3.12 |
| 108 | + bc.extend( |
| 109 | + [ |
| 110 | + bytecode.Instr("PUSH_NULL"), |
| 111 | + bytecode.Instr("LOAD_GLOBAL", (False, "__lazy_trampoline_entry")), |
| 112 | + bytecode.Instr("LOAD_FAST", "args"), |
| 113 | + bytecode.Instr("LOAD_FAST", "kwargs"), |
| 114 | + bytecode.Instr("CALL_FUNCTION_EX", 1), |
| 115 | + bytecode.Instr("RETURN_VALUE"), |
| 116 | + ] |
| 117 | + ) |
| 118 | + else: |
| 119 | + # Python 3.10 and earlier |
| 120 | + bc.extend( |
| 121 | + [ |
| 122 | + bytecode.Instr("LOAD_GLOBAL", "__lazy_trampoline_entry"), |
| 123 | + bytecode.Instr("LOAD_FAST", "args"), |
| 124 | + bytecode.Instr("LOAD_FAST", "kwargs"), |
| 125 | + bytecode.Instr("CALL_FUNCTION_EX", 1), |
| 126 | + bytecode.Instr("RETURN_VALUE"), |
| 127 | + ] |
| 128 | + ) |
| 129 | + |
| 130 | + return bc.to_code() |
| 131 | + |
| 132 | + |
21 | 133 | # This module implements utilities for wrapping a function with a context |
22 | 134 | # manager. The rough idea is to re-write the function's bytecode to look like |
23 | 135 | # this: |
@@ -399,6 +511,10 @@ def extract(cls, f: FunctionType) -> "WrappingContext": |
399 | 511 | def wrap(self) -> None: |
400 | 512 | t.cast(_UniversalWrappingContext, _UniversalWrappingContext.wrapped(self.__wrapped__)).register(self) |
401 | 513 |
|
| 514 | + def wrap_lazy(self) -> None: |
| 515 | + """Install lazy wrapping that defers bytecode instrumentation until first call.""" |
| 516 | + t.cast(_UniversalWrappingContext, _UniversalWrappingContext.wrapped_lazy(self.__wrapped__)).register(self) |
| 517 | + |
402 | 518 | def unwrap(self) -> None: |
403 | 519 | f = self.__wrapped__ |
404 | 520 |
|
@@ -431,6 +547,16 @@ def register(self, context: WrappingContext) -> None: |
431 | 547 | self._contexts.append(context) |
432 | 548 | self._contexts.sort(key=lambda c: c.__priority__) |
433 | 549 |
|
| 550 | + @classmethod |
| 551 | + def wrapped_lazy(cls, f: FunctionType) -> "_UniversalWrappingContext": |
| 552 | + """Create a universal wrapping context with lazy bytecode instrumentation.""" |
| 553 | + if cls.is_wrapped(f): |
| 554 | + context = cls.extract(f) |
| 555 | + else: |
| 556 | + context = cls(f) |
| 557 | + context.wrap_lazy() |
| 558 | + return context |
| 559 | + |
434 | 560 | def unregister(self, context: WrappingContext) -> None: |
435 | 561 | try: |
436 | 562 | self._contexts.remove(context) |
@@ -497,86 +623,123 @@ def extract(cls, f: FunctionType) -> "_UniversalWrappingContext": |
497 | 623 | raise ValueError("Function is not wrapped") |
498 | 624 | return t.cast(_UniversalWrappingContext, t.cast(ContextWrappedFunction, f).__dd_context_wrapped__) |
499 | 625 |
|
500 | | - if sys.version_info >= (3, 11): |
501 | | - |
502 | | - def wrap(self) -> None: |
503 | | - f = self.__wrapped__ |
| 626 | + def _build_wrapped_code(self, f: FunctionType) -> CodeType: |
| 627 | + """ |
| 628 | + Build the heavy instrumented bytecode for a function. |
| 629 | + This is extracted from wrap() to enable lazy wrapping. |
| 630 | + """ |
| 631 | + bc = Bytecode.from_code(f.__code__) |
504 | 632 |
|
505 | | - if self.is_wrapped(f): |
506 | | - raise ValueError("Function already wrapped") |
| 633 | + # Prefix every return |
| 634 | + i = 0 |
| 635 | + while i < len(bc): |
| 636 | + instr = bc[i] |
| 637 | + try: |
| 638 | + if instr.name == "RETURN_VALUE": |
| 639 | + return_code = CONTEXT_RETURN.bind({"context_return": self.__return__}, lineno=instr.lineno) |
| 640 | + elif sys.version_info >= (3, 12) and instr.name == "RETURN_CONST": # Python 3.12+ |
| 641 | + return_code = CONTEXT_RETURN_CONST.bind( |
| 642 | + {"context_return": self.__return__, "value": instr.arg}, lineno=instr.lineno |
| 643 | + ) |
| 644 | + else: |
| 645 | + return_code = [] |
507 | 646 |
|
508 | | - bc = Bytecode.from_code(f.__code__) |
| 647 | + bc[i:i] = return_code |
| 648 | + i += len(return_code) |
| 649 | + except AttributeError: |
| 650 | + # Not an instruction |
| 651 | + pass |
| 652 | + i += 1 |
509 | 653 |
|
510 | | - # Prefix every return |
| 654 | + # Search for the RESUME instruction |
| 655 | + for i, instr in enumerate(bc, 1): |
| 656 | + try: |
| 657 | + if instr.name == "RESUME": |
| 658 | + break |
| 659 | + except AttributeError: |
| 660 | + # Not an instruction |
| 661 | + pass |
| 662 | + else: |
511 | 663 | i = 0 |
512 | | - while i < len(bc): |
513 | | - instr = bc[i] |
514 | | - try: |
515 | | - if instr.name == "RETURN_VALUE": |
516 | | - return_code = CONTEXT_RETURN.bind({"context_return": self.__return__}, lineno=instr.lineno) |
517 | | - elif sys.version_info >= (3, 12) and instr.name == "RETURN_CONST": # Python 3.12+ |
518 | | - return_code = CONTEXT_RETURN_CONST.bind( |
519 | | - {"context_return": self.__return__, "value": instr.arg}, lineno=instr.lineno |
520 | | - ) |
521 | | - else: |
522 | | - return_code = [] |
523 | 664 |
|
524 | | - bc[i:i] = return_code |
525 | | - i += len(return_code) |
526 | | - except AttributeError: |
527 | | - # Not an instruction |
528 | | - pass |
529 | | - i += 1 |
| 665 | + bc[i:i] = CONTEXT_HEAD.bind({"context_enter": self.__enter__}, lineno=f.__code__.co_firstlineno) |
530 | 666 |
|
531 | | - # Search for the RESUME instruction |
532 | | - for i, instr in enumerate(bc, 1): |
533 | | - try: |
534 | | - if instr.name == "RESUME": |
| 667 | + # Wrap every line outside a try block |
| 668 | + except_label = bytecode.Label() |
| 669 | + first_try_begin = last_try_begin = bytecode.TryBegin(except_label, push_lasti=True) |
| 670 | + |
| 671 | + i = 0 |
| 672 | + while i < len(bc): |
| 673 | + instr = bc[i] |
| 674 | + if isinstance(instr, bytecode.TryBegin) and last_try_begin is not None: |
| 675 | + bc.insert(i, bytecode.TryEnd(last_try_begin)) |
| 676 | + last_try_begin = None |
| 677 | + i += 1 |
| 678 | + elif isinstance(instr, bytecode.TryEnd): |
| 679 | + j = i + 1 |
| 680 | + while j < len(bc) and not isinstance(bc[j], bytecode.TryBegin): |
| 681 | + if isinstance(bc[j], bytecode.Instr): |
| 682 | + last_try_begin = bytecode.TryBegin(except_label, push_lasti=True) |
| 683 | + bc.insert(i + 1, last_try_begin) |
535 | 684 | break |
536 | | - except AttributeError: |
537 | | - # Not an instruction |
538 | | - pass |
539 | | - else: |
540 | | - i = 0 |
| 685 | + j += 1 |
| 686 | + i += 1 |
| 687 | + i += 1 |
541 | 688 |
|
542 | | - bc[i:i] = CONTEXT_HEAD.bind({"context_enter": self.__enter__}, lineno=f.__code__.co_firstlineno) |
| 689 | + bc.insert(0, first_try_begin) |
543 | 690 |
|
544 | | - # Wrap every line outside a try block |
545 | | - except_label = bytecode.Label() |
546 | | - first_try_begin = last_try_begin = bytecode.TryBegin(except_label, push_lasti=True) |
| 691 | + bc.append(bytecode.TryEnd(last_try_begin)) |
| 692 | + bc.append(except_label) |
| 693 | + bc.extend(CONTEXT_FOOT.bind({"context_exit": self._exit})) |
547 | 694 |
|
548 | | - i = 0 |
549 | | - while i < len(bc): |
550 | | - instr = bc[i] |
551 | | - if isinstance(instr, bytecode.TryBegin) and last_try_begin is not None: |
552 | | - bc.insert(i, bytecode.TryEnd(last_try_begin)) |
553 | | - last_try_begin = None |
554 | | - i += 1 |
555 | | - elif isinstance(instr, bytecode.TryEnd): |
556 | | - j = i + 1 |
557 | | - while j < len(bc) and not isinstance(bc[j], bytecode.TryBegin): |
558 | | - if isinstance(bc[j], bytecode.Instr): |
559 | | - last_try_begin = bytecode.TryBegin(except_label, push_lasti=True) |
560 | | - bc.insert(i + 1, last_try_begin) |
561 | | - break |
562 | | - j += 1 |
563 | | - i += 1 |
564 | | - i += 1 |
| 695 | + # Link the function to its original code object so that we can retrieve |
| 696 | + # it later if required. |
| 697 | + link_function_to_code(f.__code__, f) |
565 | 698 |
|
566 | | - bc.insert(0, first_try_begin) |
| 699 | + return bc.to_code() |
567 | 700 |
|
568 | | - bc.append(bytecode.TryEnd(last_try_begin)) |
569 | | - bc.append(except_label) |
570 | | - bc.extend(CONTEXT_FOOT.bind({"context_exit": self._exit})) |
| 701 | + def wrap_lazy(self) -> None: |
| 702 | + """ |
| 703 | + Install lazy wrapping: replace function's __code__ with a trampoline |
| 704 | + that defers the expensive bytecode instrumentation until first call. |
| 705 | + """ |
| 706 | + f = self.__wrapped__ |
| 707 | + |
| 708 | + if self.is_wrapped(f): |
| 709 | + raise ValueError("Function already wrapped") |
| 710 | + |
| 711 | + # Lazy wrapping doesn't work with closures (functions with freevars) |
| 712 | + # because we can't replace __code__ with different freevar counts. |
| 713 | + # Fall back to eager wrapping in this case. |
| 714 | + if len(f.__code__.co_freevars) > 0: |
| 715 | + return self.wrap() |
| 716 | + |
| 717 | + # Mark the function as wrapped immediately (before the trampoline) |
| 718 | + t.cast(ContextWrappedFunction, f).__dd_context_wrapped__ = self |
| 719 | + |
| 720 | + # Create a trampoline code object with matching freevars |
| 721 | + tramp_code = _make_trampoline_code(f.__code__) |
| 722 | + |
| 723 | + # Register the builder that will be called on first invocation |
| 724 | + _lazy_registry[tramp_code] = LazyMeta(f, self._build_wrapped_code) |
| 725 | + |
| 726 | + # Swap in the trampoline (cheap operation) |
| 727 | + f.__code__ = tramp_code |
| 728 | + |
| 729 | + if sys.version_info >= (3, 11): |
| 730 | + |
| 731 | + def wrap(self) -> None: |
| 732 | + """Eagerly wrap the function with full bytecode instrumentation.""" |
| 733 | + f = self.__wrapped__ |
| 734 | + |
| 735 | + if self.is_wrapped(f): |
| 736 | + raise ValueError("Function already wrapped") |
571 | 737 |
|
572 | 738 | # Mark the function as wrapped by a wrapping context |
573 | 739 | t.cast(ContextWrappedFunction, f).__dd_context_wrapped__ = self |
574 | 740 |
|
575 | | - # Replace the function code with the wrapped code. We also link |
576 | | - # the function to its original code object so that we can retrieve |
577 | | - # it later if required. |
578 | | - link_function_to_code(f.__code__, f) |
579 | | - f.__code__ = bc.to_code() |
| 741 | + # Build and install the heavy wrapped code immediately |
| 742 | + f.__code__ = self._build_wrapped_code(f) |
580 | 743 |
|
581 | 744 | def unwrap(self) -> None: |
582 | 745 | f = self.__wrapped__ |
|
0 commit comments