Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions docs/source/overview/core_concepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,28 @@ Use :func:`nsight.analyze.kernel` to annotate a benchmark function. Nsight Pytho

benchmark(configs=[(1024,), (2048,)])

Config values are mapped to function parameters in declaration order. The following
parameter kinds are supported:

- **Regular parameters** (``POSITIONAL_OR_KEYWORD``): fully supported.
- **Keyword-only parameters** (after ``*`` or ``*args``): fully supported — config values
are automatically passed as keyword arguments.
- **Parameters with default values**: configs may omit trailing parameters that have
defaults. The defaults are filled in automatically.
- ``*args`` and ``**kwargs``: tolerated in the signature but ignored — they will always
be empty during profiling and will not appear in the results.

.. code-block:: python

@nsight.analyze.kernel
def benchmark(x, y, *, mode="fast"):
...

# mode has a default, so it can be omitted from configs.
# These two calls are equivalent:
benchmark(configs=[(1024, 2048)])
benchmark(configs=[(1024, 2048, "fast")])

**3. Plot Decorator**
Add :func:`nsight.analyze.plot` to automatically generate plots from your profiling runs.

Expand Down
1 change: 1 addition & 0 deletions docs/source/release_notes/known_issues.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ Known Issues
- Kernels launched from a subprocess which is created within the annotated region will not be profiled.
- For the ``nsight.analyze.kernel``'s ``replay_mode="range"`` option, only a subset of CUDA APIs are supported within the annotated range. If an unsupported API call is detected, an error will be reported. For details on supported APIs, refer to the `NVIDIA Nsight Compute Profiling Guide <https://docs.nvidia.com/nsight-compute/ProfilingGuide/index.html#supported-apis>`_. In such cases, you can either switch to ``replay_mode="kernel"`` or modify the code to exclude the unsupported API from the annotated range.
- Nested annotations (using ``nsight.annotate`` within another ``nsight.annotate`` context) are not supported. nsight-python errors out when nested annotations are used.
- ``*args`` and ``**kwargs`` in decorated function signatures are tolerated but ignored — they will always be empty and will not appear in the profiling output.
126 changes: 112 additions & 14 deletions nsight/collection/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,87 @@
from nsight import annotation, exceptions, thermovision, transformation, utils


def _get_regular_params(
sig: inspect.Signature,
) -> list[inspect.Parameter]:
"""Return the list of regular (non-variadic) parameters from a signature."""
return [
p for p in sig.parameters.values()
if p.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
]


def _count_params(
sig: inspect.Signature,
) -> tuple[int, int]:
"""Count (required, total) regular parameters in a signature.

Returns:
A (required_count, total_count) tuple. required_count excludes
parameters with defaults, total_count includes them.
"""
params = _get_regular_params(sig)
required = sum(1 for p in params if p.default is inspect.Parameter.empty)
return required, len(params)


def _pad_config_with_defaults(
sig: inspect.Signature, config: Sequence[Any]
) -> tuple[Any, ...]:
"""Pad a config tuple with default values for any missing trailing parameters.

If the config provides fewer values than the function has parameters, the
remaining parameters must have defaults, which are appended to the config.
This ensures the rest of the pipeline always sees full-length configs.

Args:
sig: The function's inspect.Signature.
config: The (possibly short) config tuple.

Returns:
A full-length config tuple with defaults filled in.
"""
params = _get_regular_params(sig)
if len(config) == len(params):
return tuple(config)
padded = list(config)
for param in params[len(config):]:
padded.append(param.default)
return tuple(padded)


def _bind_config_to_signature(
sig: inspect.Signature, config: Sequence[Any]
) -> tuple[list[Any], dict[str, Any]]:
"""Split a config tuple into positional args and keyword-only kwargs.

Maps config values to function parameters in declaration order, separating
them into positional arguments (for POSITIONAL_OR_KEYWORD params) and
keyword arguments (for KEYWORD_ONLY params). VAR_POSITIONAL and VAR_KEYWORD
parameters are skipped. Parameters with defaults that are not covered by
the config are left for the function to fill in.

Args:
sig: The function's inspect.Signature.
config: The config tuple to bind.

Returns:
A (positional_args, keyword_args) tuple ready for func(*pos, **kw).
"""
positional: list[Any] = []
keyword: dict[str, Any] = {}
config_iter = iter(config)
for param in sig.parameters.values():
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
continue
val = next(config_iter)
if param.kind == inspect.Parameter.KEYWORD_ONLY:
keyword[param.name] = val
else:
positional.append(val)
return positional, keyword


def _sanitize_configs(
func: Callable[..., Any],
*args: Any,
Expand Down Expand Up @@ -60,6 +141,10 @@ def _sanitize_configs(
- For functions with no parameters, an empty config [()] is created automatically.
- The function combines `args` and `kwargs` into a single list if `args` are provided.
- The function assumes that `kwargs` keys are in the expected order when combining.
- Config values are mapped to parameters in declaration order. Both
regular and keyword-only parameters are supported. Parameters with
defaults may be omitted from configs. ``*args`` and ``**kwargs``
are tolerated but ignored.
"""
if len(args) > 0:
# We do not expect any configs in this case
Expand All @@ -76,8 +161,8 @@ def _sanitize_configs(
if decorator_configs is None:
# Check if function takes no arguments
sig = inspect.signature(func)
expected_arg_count = len(sig.parameters)
if expected_arg_count == 0:
required_count, total_count = _count_params(sig)
if required_count == 0:
# For functions with no arguments, create a single empty config
# This allows calling the function without requiring explicit configs
configs = [()]
Expand Down Expand Up @@ -105,8 +190,8 @@ def _sanitize_configs(

# If function takes exactly one argument, allow scalar configs
sig = inspect.signature(func)
expected_arg_count = len(sig.parameters)
if expected_arg_count == 1:
required_count, total_count = _count_params(sig)
if total_count == 1:
normalized_configs: list[Sequence[Any]] = []
for config in configs:
if utils.is_scalar(config):
Expand All @@ -122,11 +207,21 @@ def _sanitize_configs(
)
first_config_arg_count = config_lengths[0]

# Validate that the number of args matches the number of function parameters
if first_config_arg_count != expected_arg_count:
raise exceptions.ProfilerException(
f"Configs have {first_config_arg_count} arguments, but function expects {expected_arg_count}"
)
# Validate that the number of args is between required and total parameters
if not (required_count <= first_config_arg_count <= total_count):
if required_count == total_count:
raise exceptions.ProfilerException(
f"Configs have {first_config_arg_count} arguments, but function expects {total_count}"
)
else:
raise exceptions.ProfilerException(
f"Configs have {first_config_arg_count} arguments, but function expects "
f"between {required_count} and {total_count}"
)

# Pad configs with default values for any missing trailing parameters
if first_config_arg_count < total_count:
configs = [_pad_config_with_defaults(sig, config) for config in configs]

return configs # type: ignore[return-value]

Expand Down Expand Up @@ -176,15 +271,16 @@ def run_profile_session(
config_lengths: list[int] = list()

for c in configs:
expected_arg_count = len(inspect.signature(func).parameters)
sig = inspect.signature(func)
required_count, total_count = _count_params(sig)

# Handle scalar values
if expected_arg_count == 1:
if total_count == 1:
if utils.is_scalar(c):
c = (c,)

# Check if func supports the input configs
if expected_arg_count != len(c):
if not (required_count <= len(c) <= total_count):
raise exceptions.ProfilerException(
f"Function '{func.__name__}' does not support the input configuration"
)
Expand All @@ -211,8 +307,10 @@ def run_profile_session(
# Clear active annotations before each run
annotation.clear_active_annotations()

# Run the function with the config
result = func(*c) # type: ignore[func-returns-value]
# Run the function with the config, splitting into positional
# and keyword-only args based on the function signature
pos_args, kw_args = _bind_config_to_signature(sig, c)
result = func(*pos_args, **kw_args) # type: ignore[func-returns-value]
if result is not None:
show_return_type_warning = True

Expand Down
15 changes: 10 additions & 5 deletions nsight/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,11 @@ def extract_df_from_report(

sig = inspect.signature(func)

# Create a new array for each argument in the signature
arg_arrays: dict[str, list[Any]] = {name: [] for name in sig.parameters.keys()}
# Create a new array for each regular argument in the signature (exclude *args/**kwargs)
arg_arrays: dict[str, list[Any]] = {
name: [] for name, p in sig.parameters.items()
if p.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
}

# Extract all profiling data
if output_progress:
Expand Down Expand Up @@ -255,9 +258,11 @@ def extract_df_from_report(
all_metrics.append(tuple(metrics))
hostnames.append(socket.gethostname())
# Add a field for every config argument
bound_args = sig.bind(*conf)
for name, val in bound_args.arguments.items():
arg_arrays[name].append(val)
config_iter = iter(conf)
for name, param in sig.parameters.items():
if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
continue
arg_arrays[name].append(next(config_iter))

# Create the DataFrame with the initial columns
df_data = {
Expand Down
13 changes: 10 additions & 3 deletions nsight/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ def aggregate_data(
if output_progress:
print("[NSIGHT-PYTHON] Processing profiled data")

# Get the number of arguments in the signature of func
num_args = len(inspect.signature(func).parameters)
# Get the number of arguments in the signature of func (exclude *args/**kwargs)
num_args = sum(
1 for p in inspect.signature(func).parameters.values()
if p.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
)

# Get the last N fields of the dataframe where N is the number of arguments
# Note: When num_args=0, we need an empty list (not all columns via [-0:])
Expand Down Expand Up @@ -104,8 +107,12 @@ def convert_non_sortable_columns(dframe: pd.DataFrame) -> pd.DataFrame:
)(col),
)

# Ensure Value column is numeric (explode can leave it as object dtype
# even though the underlying values are always numeric from NCU metrics)
df["Value"] = pd.to_numeric(df["Value"])

# Apply aggregation with named aggregation
groupby_df = df.groupby(groupby_columns + func_fields)
groupby_df = df.groupby(groupby_columns + func_fields, dropna=False)
agg_df = groupby_df.agg(**named_aggs).reset_index()

# Compute 95% confidence intervals
Expand Down
Loading
Loading