Source code for nvbenjo.system_info

from __future__ import annotations

from platform import uname
from typing import Any
import logging

import pynvml
from cpuinfo import get_cpu_info
import psutil

from .utils import format_num

logger = logging.getLogger(__name__)


def _get_architecture_name_from_version(version: int) -> str:
    version_names = {
        pynvml.NVML_DEVICE_ARCH_KEPLER: "Kepler",
        pynvml.NVML_DEVICE_ARCH_MAXWELL: "Maxwell",
        pynvml.NVML_DEVICE_ARCH_PASCAL: "Pascal",
        pynvml.NVML_DEVICE_ARCH_VOLTA: "Volta",
        pynvml.NVML_DEVICE_ARCH_TURING: "Truing",
        pynvml.NVML_DEVICE_ARCH_AMPERE: "Ampere",
        pynvml.NVML_DEVICE_ARCH_ADA: "Ada",
        pynvml.NVML_DEVICE_ARCH_HOPPER: "Hopper",
        pynvml.NVML_DEVICE_ARCH_BLACKWELL: "Blackwell",
    }
    return f"Version {version} ({version_names.get(version, 'Unknown')})"


[docs] def get_gpu_info() -> dict[str, Any]: """Retrieve information about GPUs in the system. Includes information such as name, architecture, memory, clock speeds, CUDA capability, and driver version. Returns ------- list[dict[str, Any]] A list of dictionaries containing GPU information. """ cuda_info = {} pynvml.nvmlInit() device_count = pynvml.nvmlDeviceGetCount() cuda_info["driver_version"] = str(pynvml.nvmlSystemGetDriverVersion()) try: import torch cuda_info["torch_version"] = str(torch.__version__) cuda_info["cudnn_version"] = str(torch.backends.cudnn.version()) except ImportError: pass gpus = [] for i in range(device_count): handle = pynvml.nvmlDeviceGetHandleByIndex(i) compute_capability = pynvml.nvmlDeviceGetCudaComputeCapability(handle) clock_gpu = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_GRAPHICS) clock_mem = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM) gpus.append( { "idx": i, "name": pynvml.nvmlDeviceGetName(handle), "architecture": _get_architecture_name_from_version(pynvml.nvmlDeviceGetArchitecture(handle)), "memory": format_num(pynvml.nvmlDeviceGetMemoryInfo(handle).total, bytes=True), "clock_gpu": f"{clock_gpu} Mhz", "clock_mem": f"{clock_mem} Mhz", "cuda_capability": f"{compute_capability[0]}.{compute_capability[1]}", } ) pynvml.nvmlShutdown() cuda_info["gpus"] = gpus return cuda_info
def get_gpu_power_usage(device_index: int) -> float: pynvml.nvmlInit() usage = pynvml.nvmlDeviceGetPowerUsage(pynvml.nvmlDeviceGetHandleByIndex(device_index)) pynvml.nvmlShutdown() return usage
[docs] def get_system_info() -> dict[str, Any]: """Retrieve system information. Collects information about the operating system, CPU, memory, and GPU. Returns ------- dict[str, Any] A dictionary containing system information. """ sys = uname() cpu = get_cpu_info() svmem = psutil.virtual_memory() try: cuda_info = get_gpu_info() except pynvml.NVMLError_LibraryNotFound: # type: ignore logger.warning("NVIDIA driver not found") cuda_info = {} if hasattr(psutil, "cpu_freq") and psutil.cpu_freq() is not None: cpufreq = psutil.cpu_freq().max else: cpufreq = 0.0 return { "os": {"system": sys.system, "node": sys.node, "release": sys.release, "version": sys.version}, "cpu": { "model": cpu["brand_raw"], "architecture": cpu["arch_string_raw"], "cores": { "physical": psutil.cpu_count(logical=False), "total": psutil.cpu_count(logical=True), }, "frequency": f"{(cpufreq / 1000):.2f} GHz", }, "memory": format_num(svmem.total, bytes=True), "cuda": cuda_info, }