Source code for nvbenjo.cfg

import logging
import os
import typing as ty
from abc import ABC
from contextlib import nullcontext
from dataclasses import dataclass, field
from pathlib import Path
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf, open_dict

from .utils import PrecisionType, ProviderType, CompileMode


def _default_cache_dir() -> str:
    base = os.environ.get("XDG_CACHE_HOME") or "~/.cache"
    return str(Path(base).expanduser() / "nvbenjo" / "torchcache")


logger = logging.getLogger(__name__)


@dataclass
class BaseModelConfig(ABC):
    """Base model configuration

    Parameters
    ----------
    name : str
        Name of the model.
    type_or_path : str
        Model type or path. Can be a local file path or a model identifier.
    kwargs : dict
        Additional keyword arguments to pass when instantiating the model.
    shape : tuple
        Input shape of the model. Use "B" or "batch_size" to denote the batch size dimension.
    num_warmup_batches : int
        Number of warm-up batches to run before measuring performance.
    num_batches : int
        Number of batches to run for performance measurement.
    batch_sizes : tuple
        Tuple of batch sizes to benchmark.
    devices : tuple of str
        Tuple of device names to benchmark on (e.g., 'cpu', 'cuda:0').
    runtime_options : dict[str, ty.Any]
        Dictionary mapping runtime names to their specific runtime configurations.
    custom_batchmetrics : dict[str, float]
        Dictionary of custom batch metrics to include in the benchmark results.
        These are calculated as `value / time_total_batch_normalized`.
        For example if each batch is a frame you can calculate FPS as:
            custom_batchmetrics:
                fps: 1.0
                batch-per-second: 1.0
        or if each sample in the batch is 3s of audio you can calculate the real time factor as:
            custom_batchmetrics:
                real-time-factor: 3
    """

    name: str = "resnet"
    type_or_path: str = "torchvision:wide_resnet101_2"
    kwargs: dict = field(default_factory=dict)
    shape: tuple = ("B", 3, 224, 224)
    num_warmup_batches: int = 5
    num_batches: int = 50
    batch_sizes: tuple = (16, 32)
    devices: tuple[str] = ("cpu",)
    runtime_options: dict[str, ty.Any] = field(default_factory=dict)
    custom_batchmetrics: dict[str, float] = field(default_factory=dict)



[docs]
@dataclass
class NvbenjoConfig:
    """
    Root configuration for nvbenjo benchmarking.

    Parameters
    ----------
    measure_memory : bool
        Whether to measure GPU memory allocation during benchmarking.
    models: dict[str, TorchModelConfig | OnnxModelConfig]
        Dictionary mapping model names to their configurations.
        See :class:`TorchModelConfig` and :class:`OnnxModelConfig` for details.
    """

    measure_memory: bool = True
    models: dict[str, ty.Any] = field(default_factory=lambda: dict())




[docs]
@dataclass
class BenchConfig:
    """
    Main benchmark configuration container.

    Parameters
    ----------
    nvbenjo : NvbenjoConfig
        Nvbenjo-specific configuration settings.
    output_dir : str or None
        Directory path where benchmark results will be saved.
        If None, uses Hydra's default output directory.
    """

    nvbenjo: NvbenjoConfig = field(default_factory=NvbenjoConfig)
    output_dir: ty.Optional[str] = None




[docs]
@dataclass
class TorchRuntimeConfig:
    """PyTorch Runtime configuration:

    Parameters
    ----------
    compile : str
        Model compilation mode:

        - ``false`` -- No compilation (default)
        - ``torch_compile`` -- Compile with ``torch.compile`` (PyTorch 2.0+)
        - ``aot_compile`` -- Ahead-of-time compilation via ``torch._inductor``
    compile_kwargs : dict
        Additional keyword arguments passed to ``torch.compile`` or ``aoti_compile_and_package``.
    precision : PrecisionType
        Precision type for model inference (e.g., fp32, fp16, amp).
    matmul_precision : str or None
        Precision for float32 matrix multiplications on GPUs with
        tensor cores (``torch.set_float32_matmul_precision``).
        One of ``"highest"``, ``"high"``, or ``"medium"``.
        When ``None`` (default), the current PyTorch global
        setting is left unchanged.
    cuda_graphs : bool
        Wrap inference in a CUDA Graph capture/replay. Eliminates per-launch CPU
        dispatch overhead at the cost of one captured graph per
        (model, batch_size, shape, dtype). Requires a CUDA device; ignored on CPU.
    cuda_graph_kwargs : dict
        Additional keyword arguments passed to ``torch.cuda.graph``
    enable_profiling : bool
        Whether to enable PyTorch profiler during inference.
    profiling_prefix : str or None
        Prefix for profiler output files. If None, a default path will be used.
    profiler_kwargs : dict
        Additional keyword arguments for torch.profiler.profile.
    cache_dir : str or None
        Directory for caching AOT-compiled packages. When set, the AOT compile
        step is skipped on cache hits keyed by (torch/cuda version, model
        identity, file size+mtime for path-based models, shape, batch_size,
        precision, compile_kwargs, device type, GPU compute capability).
        Defaults to ``$XDG_CACHE_HOME/nvbenjo/torchcache`` (or
        ``~/.cache/nvbenjo/torchcache`` if ``XDG_CACHE_HOME`` is unset).
        Set to ``None`` to disable caching.
    """

    compile: str = "False"
    compile_kwargs: dict = field(default_factory=dict)
    precision: PrecisionType = PrecisionType.FP32
    matmul_precision: ty.Optional[ty.Literal["highest", "high", "medium"]] = None
    cuda_graphs: bool = False
    cuda_graph_kwargs: dict = field(default_factory=dict)
    enable_profiling: bool = False
    profiling_prefix: ty.Optional[str] = None
    profiler_kwargs: dict = field(default_factory=dict)
    cache_dir: ty.Optional[str] = field(default_factory=_default_cache_dir)

    def __post_init__(self):
        # Hydra passes everything as primitives, normalize here
        if isinstance(self.compile, bool) or str(self.compile).lower() in ("true", "false"):
            is_true = self.compile is True or str(self.compile).lower() == "true"
            self.compile = "torch_compile" if is_true else "none"
        self._compile_mode = CompileMode(self.compile.lower())




[docs]
@dataclass
class OnnxRuntimeConfig:
    """ONNX Runtime configuration:

    Parameters
    ----------
    execution_providers : tuple of str or None
        Tuple of execution providers to use (e.g., ('CPUExecutionProvider',
        'CUDAExecutionProvider')). If None, uses the default provider.
    graph_optimization_level : str
        Graph optimization level for ONNX Runtime. Options are 'ORT_ENABLE_ALL', 'ORT_ENABLE_LAYOUT',
        'ORT_ENABLE_BASIC', 'ORT_DISABLE_ALL'.
    intra_op_num_threads : int
        Number of threads used to parallelize the execution within nodes.
    inter_op_num_threads : int
        Number of threads used to parallelize the execution of the graph (between nodes)
    log_severity_level : int
        Logging severity level (0=VERBOSE, 1=INFO, 2=WARNING, 3=ERROR, 4=FATAL)
    enable_profiling : bool
        Whether to enable profiling in ONNX Runtime.
    profiling_prefix : str or None
        Prefix for profiling output files. If None, a default path will be used.
    provider_options : sequence of dict or None
        Additional options for each execution provider.
    """

    execution_providers: ty.Optional[ty.List[ProviderType]] = None
    graph_optimization_level: str = (
        "ORT_ENABLE_ALL"  # 99 ORT_ENABLE_ALL, 3 ORT_ENABLE_LAYOUT, 1 ORT_ENABLE_BASIC, 0 ORT_DISABLE_ALL
    )
    intra_op_num_threads: int = 1
    inter_op_num_threads: int = 0
    log_severity_level: int = 3  # Error
    enable_profiling: bool = False
    profiling_prefix: ty.Optional[str] = None
    provider_options: ty.Sequence[dict[ty.Any, ty.Any]] | None = None




[docs]
@dataclass
class TorchModelConfig(BaseModelConfig):
    """PyTorch model configuration

    Parameters
    ----------
    name : str
        Name of the model.
    type_or_path : str
        Model type or path. Supports prefixes to specify the model source:

        - ``torchvision:<name>`` -- Load a torchvision model (e.g. ``torchvision:resnet50``)
        - ``huggingface:<name>`` -- Load a HuggingFace AutoModel (e.g. ``huggingface:bert-base-uncased``)
        - ``jit:<path>`` -- Load a TorchScript/JIT model
        - ``torchexport:<path>`` -- Load a ``torch.export`` saved model
        - ``aot:<path>`` -- Load a pre-compiled AOT model

        .. note::

            For ``torchexport`` and ``aot`` models, precision is baked in at export time
            and cannot be changed at runtime.

        - *(no prefix)* -- Path to a model saved with ``torch.save`` or ``torch.jit.save``
    kwargs : dict
        Additional keyword arguments to pass when instantiating the model.
    shape : tuple
        Input shape of the model. Use "B" to denote the batch size dimension.
        Examples::

            # Single input shape
            ("B", 3, 224, 224)

            # Multiple input shapes
            (("B", 3, 224, 224), ("B", 10))

            # Dictionary with metadata
            ({"name": "input1", "type": "float", "shape": ("B", 3, 224, 224), "min_max": (0, 1)},)

            # Multiple dictionary inputs
            (
                {"name": "input1", "type": "float", "shape": ("B", 3, 224, 224), "min_max": (0, 1)},
                {"name": "input2", "type": "int", "shape": (1, 3)},
                {"name": "input3", "type": "int", "shape": (), "value": 42},
            )
    num_warmup_batches : int
        Number of warm-up batches to run before measuring performance.
    num_batches : int
        Number of batches to run for performance measurement.
    batch_sizes : tuple
        Tuple of batch sizes to benchmark.
    devices : tuple of str
        Tuple of device names to benchmark on (e.g., 'cpu', 'cuda:0').
    runtime_options : dict[str, :class:`~nvbenjo.cfg.TorchRuntimeConfig`]
        Dictionary mapping runtime names to their specific runtime configurations.
    """

    model_kwargs: dict = field(default_factory=dict)
    runtime_options: dict[str, TorchRuntimeConfig] = field(default_factory=lambda: {"default": TorchRuntimeConfig()})

    def __post_init__(self):
        for i, (key, opt) in enumerate(self.runtime_options.items()):
            if isinstance(opt, DictConfig):
                self.runtime_options[key] = OmegaConf.structured(TorchRuntimeConfig(**OmegaConf.to_container(opt)))  # type: ignore

        if self.type_or_path.startswith(("aot:", "torchexport:")):
            precisions = {rt.precision for rt in self.runtime_options.values() if isinstance(rt, TorchRuntimeConfig)}
            if len(precisions) > 1:
                raise ValueError(
                    f"Model '{self.name}' is pre-exported — precision is baked in at export time. "
                    f"Got multiple different precisions in runtime_options: {precisions}"
                )
            compile_modes = {
                rt._compile_mode for rt in self.runtime_options.values() if isinstance(rt, TorchRuntimeConfig)
            }
            if compile_modes - {CompileMode.NONE}:
                logger.warning(f"Model '{self.name}' is pre-exported — setting compile has no effect.")




[docs]
@dataclass
class OnnxModelConfig(BaseModelConfig):
    """ONNX model configuration

    Parameters
    ----------
    name : str
        Name of the model.
    type_or_path : str
        Model type or path. Can be a local file path or a model identifier.
    kwargs : dict
        Additional keyword arguments to pass when instantiating the model.
    shape : tuple
        Input shape of the model. Use "B" to denote the batch size dimension.

        Examples::

            # Single input shape
            ("B", 3, 224, 224)

            # Multiple input shapes
            (("B", 3, 224, 224), ("B", 10))

            # Dictionary with metadata
            ({"name": "input1", "type": "float", "shape": ("B", 3, 224, 224), "min_max": (0, 1)},)

            # Multiple dictionary inputs
            (
                {"name": "input1", "type": "float", "shape": ("B", 3, 224, 224), "min_max": (0, 1)},
                {"name": "input2", "type": "int", "shape": (1, 3)},
                {"name": "input3", "type": "int", "shape": (), "value": 42},
            )
    num_warmup_batches : int
        Number of warm-up batches to run before measuring performance.
    num_batches : int
        Number of batches to run for performance measurement.
    batch_sizes : tuple
        Tuple of batch sizes to benchmark.
    devices : tuple of str
        Tuple of device names to benchmark on (e.g., 'cpu', 'cuda:0').
    runtime_options : dict[str, :class:`~nvbenjo.cfg.OnnxRuntimeConfig`]
        Dictionary mapping runtime names to their specific runtime configurations.
    """

    runtime_options: dict[str, OnnxRuntimeConfig] = field(default_factory=lambda: {"default": OnnxRuntimeConfig()})

    def __post_init__(self):
        for i, (key, opt) in enumerate(self.runtime_options.items()):
            if isinstance(opt, DictConfig):
                self.runtime_options[key] = OmegaConf.structured(OnnxRuntimeConfig(**OmegaConf.to_container(opt)))  # type: ignore



def instantiate_model_configs(cfg: ty.Union[BenchConfig, DictConfig]) -> dict[str, BaseModelConfig]:
    models = {}
    runtimes = {}
    for model_name, model in cfg.nvbenjo.models.items():
        ctxt = open_dict(model) if isinstance(model, DictConfig) else nullcontext()
        if "_target_" not in model:
            with ctxt:
                if model["type_or_path"].endswith(".onnx") or model["type_or_path"].startswith("onnx:"):
                    cfg.nvbenjo.models[model_name]["_target_"] = (
                        f"{OnnxModelConfig.__module__}.{OnnxModelConfig.__qualname__}"
                    )
                    cfg.nvbenjo.models[model_name]["_convert_"] = "all"
                    if "runtime_options" in model:
                        runtimes[model_name] = {}
                        for runtime_name in model["runtime_options"].keys():
                            cfg.nvbenjo.models[model_name]["runtime_options"][runtime_name]["_target_"] = (
                                f"{OnnxRuntimeConfig.__module__}.{OnnxRuntimeConfig.__qualname__}"
                            )
                            runtimes[model_name][runtime_name] = instantiate(
                                cfg.nvbenjo.models[model_name]["runtime_options"][runtime_name]
                            )
                else:
                    cfg.nvbenjo.models[model_name]["_target_"] = (
                        f"{TorchModelConfig.__module__}.{TorchModelConfig.__qualname__}"
                    )
                    cfg.nvbenjo.models[model_name]["_convert_"] = "all"
                    if "runtime_options" in model:
                        runtimes[model_name] = {}
                        for runtime_name in model["runtime_options"].keys():
                            cfg.nvbenjo.models[model_name]["runtime_options"][runtime_name]["_target_"] = (
                                f"{TorchRuntimeConfig.__module__}.{TorchRuntimeConfig.__qualname__}"
                            )
                            runtimes[model_name][runtime_name] = instantiate(
                                cfg.nvbenjo.models[model_name]["runtime_options"][runtime_name]
                            )
                            runtimes[model_name][runtime_name].precision = PrecisionType[
                                cfg.nvbenjo.models[model_name]["runtime_options"][runtime_name]["precision"].upper()
                            ]

        models[model_name] = instantiate(model) if isinstance(model, DictConfig) else model
        if model_name in runtimes:
            models[model_name].runtime_options = runtimes[model_name]

    # For onnx profiling we add a valid profiling prefix in the output directory if needed
    for model_name, model in models.items():
        if isinstance(model, (OnnxModelConfig, TorchModelConfig)):
            for runtime_name, runtime in model.runtime_options.items():
                if runtime.enable_profiling:
                    if cfg.output_dir is None:
                        raise ValueError("output_dir must be set when profiling is enabled.")

                    if runtime.profiling_prefix is None:
                        runtime.profiling_prefix = os.path.join(
                            cfg.output_dir, model_name, f"{model_name}_{runtime_name}_profile"
                        )
                    else:
                        # make sure the relative path is inside the output dir
                        if not os.path.abspath(runtime.profiling_prefix) == runtime.profiling_prefix:
                            runtime.profiling_prefix = os.path.abspath(
                                os.path.join(cfg.output_dir, runtime.profiling_prefix)
                            )
                    os.makedirs(os.path.dirname(runtime.profiling_prefix), exist_ok=True)

    return models