diff --git a/cuda_core/cuda/core/_host.py b/cuda_core/cuda/core/_host.py index 79da81cae2..c4bef88649 100644 --- a/cuda_core/cuda/core/_host.py +++ b/cuda_core/cuda/core/_host.py @@ -62,10 +62,12 @@ def _get_or_create(cls, numa_id: int | None, is_numa_current: bool) -> Host: @property def numa_id(self) -> int | None: + """NUMA node ID, or ``None`` if not pinned to a specific NUMA node.""" return self._numa_id @property def is_numa_current(self) -> bool: + """Whether this ``Host`` represents the calling thread's NUMA node (constructed via :meth:`numa_current`).""" return self._is_numa_current @classmethod diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py index 1a6d377579..33ae414007 100644 --- a/cuda_core/cuda/core/typing.py +++ b/cuda_core/cuda/core/typing.py @@ -43,12 +43,28 @@ class SourceCodeType(StrEnum): + """Source language passed to :class:`~cuda.core.Program`. + + ``CXX`` selects CUDA C++, ``PTX`` selects PTX assembly text, and + ``NVVM`` selects NVVM IR (LLVM bitcode). + """ + CXX = "c++" PTX = "ptx" NVVM = "nvvm" class ObjectCodeFormatType(StrEnum): + """Output format produced by :meth:`~cuda.core.Program.compile`. + + ``PTX`` — PTX assembly text. + ``CUBIN`` — device-native CUDA binary. + ``LTOIR`` — LTO (link-time optimization) IR for later linking. + ``FATBIN`` — fat binary bundling multiple device images. + ``OBJECT`` — relocatable device object. + ``LIBRARY`` — device code library. + """ + PTX = "ptx" CUBIN = "cubin" LTOIR = "ltoir" @@ -58,6 +74,14 @@ class ObjectCodeFormatType(StrEnum): class CompilerBackendType(StrEnum): + """Compiler backend selected via :class:`~cuda.core.ProgramOptions`. + + ``NVRTC`` — NVIDIA Runtime Compilation. + ``NVVM`` — NVVM LLVM backend. + ``NVJITLINK`` — nvJitLink device-side linker. + ``DRIVER`` — CUDA driver PTX JIT compiler. + """ + NVRTC = "NVRTC" NVVM = "NVVM" NVJITLINK = "nvJitLink" @@ -65,36 +89,80 @@ class CompilerBackendType(StrEnum): class PCHStatusType(StrEnum): + """Precompiled-header (PCH) outcome reported by :meth:`~cuda.core.Program.compile`. + + ``CREATED`` — PCH was successfully written. + ``NOT_ATTEMPTED`` — PCH creation was skipped (backend does not support it or + the option was not requested). + ``FAILED`` — PCH creation was attempted but failed. + """ + CREATED = "created" NOT_ATTEMPTED = "not_attempted" FAILED = "failed" class GraphConditionalType(StrEnum): + """Conditional node flavor for :class:`~cuda.core.graph.GraphBuilder`. + + ``IF`` — body graph executes at most once based on a condition. + ``WHILE`` — body graph loops while the condition is true. + ``SWITCH`` — selects one child graph by an integer index. + """ + IF = "if" WHILE = "while" SWITCH = "switch" class GraphMemoryType(StrEnum): + """Memory space for a graph memory-allocation or free node. + + ``DEVICE`` — GPU device memory. + ``HOST`` — pinned host memory. + ``MANAGED`` — CUDA managed (unified) memory. + """ + DEVICE = "device" HOST = "host" MANAGED = "managed" class ManagedMemoryLocationType(StrEnum): + """Destination type for managed-memory prefetch and advise operations. + + ``DEVICE`` — target a GPU device. + ``HOST`` — target the CPU host (any NUMA node). + ``HOST_NUMA`` — target a specific host NUMA node (CUDA 13+ only). + """ + DEVICE = "device" HOST = "host" HOST_NUMA = "host_numa" class VirtualMemoryHandleType(StrEnum): + """OS handle type for exporting virtual memory allocations across processes. + + ``POSIX_FD`` — POSIX file descriptor (Linux). + ``WIN32_KMT`` — Win32 D3DKMT handle (Windows). + ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies. + """ + POSIX_FD = "posix_fd" WIN32_KMT = "win32_kmt" FABRIC = "fabric" class VirtualMemoryLocationType(StrEnum): + """Physical backing location for a virtual memory allocation. + + ``DEVICE`` — GPU device memory. + ``HOST`` — pinned host memory. + ``HOST_NUMA`` — host memory pinned to a specific NUMA node. + ``HOST_NUMA_CURRENT`` — host memory on the calling thread's NUMA node. + """ + DEVICE = "device" HOST = "host" HOST_NUMA = "host_numa" @@ -102,16 +170,34 @@ class VirtualMemoryLocationType(StrEnum): class VirtualMemoryGranularityType(StrEnum): + """Granularity query type for virtual memory allocations. + + ``MINIMUM`` — smallest allocation size supported by the device. + ``RECOMMENDED`` — granularity that yields best performance on the device. + """ + MINIMUM = "minimum" RECOMMENDED = "recommended" class VirtualMemoryAccessType(StrEnum): + """Access permissions for a virtual memory mapping. + + ``READ_WRITE`` — both read and write access. + ``READ`` — read-only access. + """ + READ_WRITE = "rw" READ = "r" class VirtualMemoryAllocationType(StrEnum): + """Physical memory type for a virtual memory backing allocation. + + ``PINNED`` — page-locked (pinned) host memory. + ``MANAGED`` — CUDA managed (unified) memory. + """ + PINNED = "pinned" MANAGED = "managed"