diff --git a/docs/source/overview/core_concepts.rst b/docs/source/overview/core_concepts.rst index 8c937c6..b18deeb 100644 --- a/docs/source/overview/core_concepts.rst +++ b/docs/source/overview/core_concepts.rst @@ -58,6 +58,28 @@ Use :func:`nsight.analyze.kernel` to annotate a benchmark function. Nsight Pytho benchmark(configs=[(1024,), (2048,)]) +Config values are mapped to function parameters in declaration order. The following +parameter kinds are supported: + +- **Regular parameters** (``POSITIONAL_OR_KEYWORD``): fully supported. +- **Keyword-only parameters** (after ``*`` or ``*args``): fully supported — config values + are automatically passed as keyword arguments. +- **Parameters with default values**: configs may omit trailing parameters that have + defaults. The defaults are filled in automatically. +- ``*args`` and ``**kwargs``: tolerated in the signature but ignored — they will always + be empty during profiling and will not appear in the results. + +.. code-block:: python + + @nsight.analyze.kernel + def benchmark(x, y, *, mode="fast"): + ... + + # mode has a default, so it can be omitted from configs. + # These two calls are equivalent: + benchmark(configs=[(1024, 2048)]) + benchmark(configs=[(1024, 2048, "fast")]) + **3. Plot Decorator** Add :func:`nsight.analyze.plot` to automatically generate plots from your profiling runs. diff --git a/docs/source/release_notes/known_issues.rst b/docs/source/release_notes/known_issues.rst index b97d061..0687c3b 100644 --- a/docs/source/release_notes/known_issues.rst +++ b/docs/source/release_notes/known_issues.rst @@ -43,3 +43,4 @@ Known Issues - Kernels launched from a subprocess which is created within the annotated region will not be profiled. - For the ``nsight.analyze.kernel``'s ``replay_mode="range"`` option, only a subset of CUDA APIs are supported within the annotated range. If an unsupported API call is detected, an error will be reported. For details on supported APIs, refer to the `NVIDIA Nsight Compute Profiling Guide `_. In such cases, you can either switch to ``replay_mode="kernel"`` or modify the code to exclude the unsupported API from the annotated range. - Nested annotations (using ``nsight.annotate`` within another ``nsight.annotate`` context) are not supported. nsight-python errors out when nested annotations are used. +- ``*args`` and ``**kwargs`` in decorated function signatures are tolerated but ignored — they will always be empty and will not appear in the profiling output. diff --git a/nsight/collection/core.py b/nsight/collection/core.py index 450d049..9e3997b 100644 --- a/nsight/collection/core.py +++ b/nsight/collection/core.py @@ -19,6 +19,87 @@ from nsight import annotation, exceptions, thermovision, transformation, utils +def _get_regular_params( + sig: inspect.Signature, +) -> list[inspect.Parameter]: + """Return the list of regular (non-variadic) parameters from a signature.""" + return [ + p for p in sig.parameters.values() + if p.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) + ] + + +def _count_params( + sig: inspect.Signature, +) -> tuple[int, int]: + """Count (required, total) regular parameters in a signature. + + Returns: + A (required_count, total_count) tuple. required_count excludes + parameters with defaults, total_count includes them. + """ + params = _get_regular_params(sig) + required = sum(1 for p in params if p.default is inspect.Parameter.empty) + return required, len(params) + + +def _pad_config_with_defaults( + sig: inspect.Signature, config: Sequence[Any] +) -> tuple[Any, ...]: + """Pad a config tuple with default values for any missing trailing parameters. + + If the config provides fewer values than the function has parameters, the + remaining parameters must have defaults, which are appended to the config. + This ensures the rest of the pipeline always sees full-length configs. + + Args: + sig: The function's inspect.Signature. + config: The (possibly short) config tuple. + + Returns: + A full-length config tuple with defaults filled in. + """ + params = _get_regular_params(sig) + if len(config) == len(params): + return tuple(config) + padded = list(config) + for param in params[len(config):]: + padded.append(param.default) + return tuple(padded) + + +def _bind_config_to_signature( + sig: inspect.Signature, config: Sequence[Any] +) -> tuple[list[Any], dict[str, Any]]: + """Split a config tuple into positional args and keyword-only kwargs. + + Maps config values to function parameters in declaration order, separating + them into positional arguments (for POSITIONAL_OR_KEYWORD params) and + keyword arguments (for KEYWORD_ONLY params). VAR_POSITIONAL and VAR_KEYWORD + parameters are skipped. Parameters with defaults that are not covered by + the config are left for the function to fill in. + + Args: + sig: The function's inspect.Signature. + config: The config tuple to bind. + + Returns: + A (positional_args, keyword_args) tuple ready for func(*pos, **kw). + """ + positional: list[Any] = [] + keyword: dict[str, Any] = {} + config_iter = iter(config) + for param in sig.parameters.values(): + if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD): + continue + val = next(config_iter) + if param.kind == inspect.Parameter.KEYWORD_ONLY: + keyword[param.name] = val + else: + positional.append(val) + return positional, keyword + + def _sanitize_configs( func: Callable[..., Any], *args: Any, @@ -60,6 +141,10 @@ def _sanitize_configs( - For functions with no parameters, an empty config [()] is created automatically. - The function combines `args` and `kwargs` into a single list if `args` are provided. - The function assumes that `kwargs` keys are in the expected order when combining. + - Config values are mapped to parameters in declaration order. Both + regular and keyword-only parameters are supported. Parameters with + defaults may be omitted from configs. ``*args`` and ``**kwargs`` + are tolerated but ignored. """ if len(args) > 0: # We do not expect any configs in this case @@ -76,8 +161,8 @@ def _sanitize_configs( if decorator_configs is None: # Check if function takes no arguments sig = inspect.signature(func) - expected_arg_count = len(sig.parameters) - if expected_arg_count == 0: + required_count, total_count = _count_params(sig) + if required_count == 0: # For functions with no arguments, create a single empty config # This allows calling the function without requiring explicit configs configs = [()] @@ -105,8 +190,8 @@ def _sanitize_configs( # If function takes exactly one argument, allow scalar configs sig = inspect.signature(func) - expected_arg_count = len(sig.parameters) - if expected_arg_count == 1: + required_count, total_count = _count_params(sig) + if total_count == 1: normalized_configs: list[Sequence[Any]] = [] for config in configs: if utils.is_scalar(config): @@ -122,11 +207,21 @@ def _sanitize_configs( ) first_config_arg_count = config_lengths[0] - # Validate that the number of args matches the number of function parameters - if first_config_arg_count != expected_arg_count: - raise exceptions.ProfilerException( - f"Configs have {first_config_arg_count} arguments, but function expects {expected_arg_count}" - ) + # Validate that the number of args is between required and total parameters + if not (required_count <= first_config_arg_count <= total_count): + if required_count == total_count: + raise exceptions.ProfilerException( + f"Configs have {first_config_arg_count} arguments, but function expects {total_count}" + ) + else: + raise exceptions.ProfilerException( + f"Configs have {first_config_arg_count} arguments, but function expects " + f"between {required_count} and {total_count}" + ) + + # Pad configs with default values for any missing trailing parameters + if first_config_arg_count < total_count: + configs = [_pad_config_with_defaults(sig, config) for config in configs] return configs # type: ignore[return-value] @@ -176,15 +271,16 @@ def run_profile_session( config_lengths: list[int] = list() for c in configs: - expected_arg_count = len(inspect.signature(func).parameters) + sig = inspect.signature(func) + required_count, total_count = _count_params(sig) # Handle scalar values - if expected_arg_count == 1: + if total_count == 1: if utils.is_scalar(c): c = (c,) # Check if func supports the input configs - if expected_arg_count != len(c): + if not (required_count <= len(c) <= total_count): raise exceptions.ProfilerException( f"Function '{func.__name__}' does not support the input configuration" ) @@ -211,8 +307,10 @@ def run_profile_session( # Clear active annotations before each run annotation.clear_active_annotations() - # Run the function with the config - result = func(*c) # type: ignore[func-returns-value] + # Run the function with the config, splitting into positional + # and keyword-only args based on the function signature + pos_args, kw_args = _bind_config_to_signature(sig, c) + result = func(*pos_args, **kw_args) # type: ignore[func-returns-value] if result is not None: show_return_type_warning = True diff --git a/nsight/extraction.py b/nsight/extraction.py index f715301..2d8b3ab 100644 --- a/nsight/extraction.py +++ b/nsight/extraction.py @@ -123,8 +123,11 @@ def extract_df_from_report( sig = inspect.signature(func) - # Create a new array for each argument in the signature - arg_arrays: dict[str, list[Any]] = {name: [] for name in sig.parameters.keys()} + # Create a new array for each regular argument in the signature (exclude *args/**kwargs) + arg_arrays: dict[str, list[Any]] = { + name: [] for name, p in sig.parameters.items() + if p.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) + } # Extract all profiling data if output_progress: @@ -255,9 +258,11 @@ def extract_df_from_report( all_metrics.append(tuple(metrics)) hostnames.append(socket.gethostname()) # Add a field for every config argument - bound_args = sig.bind(*conf) - for name, val in bound_args.arguments.items(): - arg_arrays[name].append(val) + config_iter = iter(conf) + for name, param in sig.parameters.items(): + if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD): + continue + arg_arrays[name].append(next(config_iter)) # Create the DataFrame with the initial columns df_data = { diff --git a/nsight/transformation.py b/nsight/transformation.py index 661d94a..a432ac2 100644 --- a/nsight/transformation.py +++ b/nsight/transformation.py @@ -37,8 +37,11 @@ def aggregate_data( if output_progress: print("[NSIGHT-PYTHON] Processing profiled data") - # Get the number of arguments in the signature of func - num_args = len(inspect.signature(func).parameters) + # Get the number of arguments in the signature of func (exclude *args/**kwargs) + num_args = sum( + 1 for p in inspect.signature(func).parameters.values() + if p.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD) + ) # Get the last N fields of the dataframe where N is the number of arguments # Note: When num_args=0, we need an empty list (not all columns via [-0:]) @@ -104,8 +107,12 @@ def convert_non_sortable_columns(dframe: pd.DataFrame) -> pd.DataFrame: )(col), ) + # Ensure Value column is numeric (explode can leave it as object dtype + # even though the underlying values are always numeric from NCU metrics) + df["Value"] = pd.to_numeric(df["Value"]) + # Apply aggregation with named aggregation - groupby_df = df.groupby(groupby_columns + func_fields) + groupby_df = df.groupby(groupby_columns + func_fields, dropna=False) agg_df = groupby_df.agg(**named_aggs).reset_index() # Compute 95% confidence intervals diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 9ec81e3..130e231 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -431,9 +431,13 @@ def function_with_default_parameter(x: int, y: Any = None) -> None: def test_function_with_default_parameter() -> None: - """Test that calling function with defaults without providing all args raises error.""" - with pytest.raises(exceptions.ProfilerException): - function_with_default_parameter() + """Test that configs can omit parameters with defaults — they get padded automatically.""" + result = function_with_default_parameter() + df = result.to_dataframe() + + assert len(df) == 2, f"Expected 2 rows (2 configs), got {len(df)}" + # y has default None, which becomes NaN in pandas + assert df["y"].isna().all(), f"y should use default None/NaN, got {df['y'].tolist()}" # ============================================================================ @@ -1231,3 +1235,154 @@ def profiled_func(x: int, y: int) -> None: assert all( df["AvgValue"].notna() & (df["AvgValue"] > 0) ), f"Invalid AvgValue for metric {metrics}" + + +# ============================================================================ +# Functions with **kwargs +# ============================================================================ + + +def test_function_with_kwargs() -> None: + """Test that functions with **kwargs in their signature work correctly. + + Regression test: _sanitize_configs counted **kwargs via len(sig.parameters), + causing a spurious validation error because the config arg count didn't + match the inflated parameter count. + """ + + @nsight.analyze.kernel(output="quiet") + def kernel_with_kwargs(x: int, y: int, **kwargs: Any) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test_kwargs"): + _ = a + b + + result = kernel_with_kwargs(configs=[(32, 32)]) + df = result.to_dataframe() + + assert len(df) == 1, f"Expected 1 row, got {len(df)}" + assert df["x"].iloc[0] == 32 + assert df["y"].iloc[0] == 32 + + +def test_function_with_keyword_only_params() -> None: + """Test that functions with keyword-only parameters work correctly. + + Keyword-only parameters (after * or *args) should be supported — + config values are mapped to parameters in declaration order and + passed as keyword arguments for keyword-only params. + """ + + @nsight.analyze.kernel(output="quiet") + def kernel_with_kw_only(x: int, *, y: int) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test_kw_only"): + _ = a + b + + result = kernel_with_kw_only(configs=[(32, 64)]) + df = result.to_dataframe() + + assert len(df) == 1, f"Expected 1 row, got {len(df)}" + assert df["x"].iloc[0] == 32 + assert df["y"].iloc[0] == 64 + + +def test_function_with_args_and_keyword_only() -> None: + """Test that functions with *args and keyword-only params after it work.""" + + @nsight.analyze.kernel(output="quiet") + def kernel_mixed(x: int, *args: Any, y: int, **kwargs: Any) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test_mixed"): + _ = a + b + + result = kernel_mixed(configs=[(32, 64)]) + df = result.to_dataframe() + + assert len(df) == 1, f"Expected 1 row, got {len(df)}" + assert df["x"].iloc[0] == 32 + assert df["y"].iloc[0] == 64 + + +def test_default_values_omitted() -> None: + """Test that configs can omit parameters that have default values.""" + + @nsight.analyze.kernel(output="quiet") + def kernel_with_defaults(x: int, y: int, z: int = 64) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test_defaults"): + _ = a + b + + result = kernel_with_defaults(configs=[(32, 32)]) + df = result.to_dataframe() + + assert len(df) == 1, f"Expected 1 row, got {len(df)}" + assert df["x"].iloc[0] == 32 + assert df["y"].iloc[0] == 32 + assert df["z"].iloc[0] == 64, f"z should use default 64, got {df['z'].iloc[0]}" + + +def test_default_values_overridden() -> None: + """Test that configs can explicitly provide values for defaulted params.""" + + @nsight.analyze.kernel(output="quiet") + def kernel_with_defaults(x: int, y: int, z: int = 64) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test_defaults_override"): + _ = a + b + + result = kernel_with_defaults(configs=[(32, 32, 128)]) + df = result.to_dataframe() + + assert len(df) == 1, f"Expected 1 row, got {len(df)}" + assert df["z"].iloc[0] == 128, f"z should be overridden to 128, got {df['z'].iloc[0]}" + + +def test_keyword_only_default_omitted() -> None: + """Test keyword-only params with defaults can be omitted from configs.""" + + @nsight.analyze.kernel(output="quiet") + def kernel_kw_default(x: int, *, y: int = 32) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test_kw_default"): + _ = a + b + + result = kernel_kw_default(configs=[(32,)]) + df = result.to_dataframe() + + assert len(df) == 1, f"Expected 1 row, got {len(df)}" + assert df["x"].iloc[0] == 32 + assert df["y"].iloc[0] == 32, f"y should use default 32, got {df['y'].iloc[0]}" + + +def test_too_few_config_args_rejected() -> None: + """Test that configs with fewer args than required params are rejected.""" + + @nsight.analyze.kernel(output="quiet") + def kernel_two_required(x: int, y: int, z: int = 64) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test"): + _ = a + b + + with pytest.raises(exceptions.ProfilerException, match="between 2 and 3"): + kernel_two_required(configs=[(32,)]) + + +def test_too_many_config_args_rejected() -> None: + """Test that configs with more args than total params are rejected.""" + + @nsight.analyze.kernel(output="quiet") + def kernel_two_params(x: int, y: int) -> None: + a = torch.randn(x, y, device="cuda") + b = torch.randn(x, y, device="cuda") + with nsight.annotate("test"): + _ = a + b + + with pytest.raises(exceptions.ProfilerException, match="function expects 2"): + kernel_two_params(configs=[(32, 32, 99)])