From cdeb89380398be70b2b9ee32d8435a7aefcdbdd8 Mon Sep 17 00:00:00 2001 From: Ifelseer <1138369491@qq.com> Date: Fri, 19 Jun 2026 08:05:13 +0000 Subject: [PATCH 1/2] add kernels T1-1-5 --- src/ntops/kernels/__init__.py | 8 ++ src/ntops/kernels/linspace.py | 31 +++++++ src/ntops/kernels/logit.py | 26 ++++++ src/ntops/kernels/logspace.py | 35 ++++++++ src/ntops/kernels/nan_to_num.py | 33 ++++++++ src/ntops/torch/__init__.py | 10 +++ src/ntops/torch/linspace.py | 43 ++++++++++ src/ntops/torch/logit.py | 30 +++++++ src/ntops/torch/logspace.py | 55 ++++++++++++ src/ntops/torch/nan_to_num.py | 60 ++++++++++++++ src/ntops/torch/trapezoid.py | 30 +++++++ tests/test_linspace.py | 121 +++++++++++++++++++++++++++ tests/test_logit.py | 103 +++++++++++++++++++++++ tests/test_logspace.py | 90 ++++++++++++++++++++ tests/test_nan_to_num.py | 143 ++++++++++++++++++++++++++++++++ tests/test_trapezoid.py | 94 +++++++++++++++++++++ 16 files changed, 912 insertions(+) create mode 100644 src/ntops/kernels/linspace.py create mode 100644 src/ntops/kernels/logit.py create mode 100644 src/ntops/kernels/logspace.py create mode 100644 src/ntops/kernels/nan_to_num.py create mode 100644 src/ntops/torch/linspace.py create mode 100644 src/ntops/torch/logit.py create mode 100644 src/ntops/torch/logspace.py create mode 100644 src/ntops/torch/nan_to_num.py create mode 100644 src/ntops/torch/trapezoid.py create mode 100644 tests/test_linspace.py create mode 100644 tests/test_logit.py create mode 100644 tests/test_logspace.py create mode 100644 tests/test_nan_to_num.py create mode 100644 tests/test_trapezoid.py diff --git a/src/ntops/kernels/__init__.py b/src/ntops/kernels/__init__.py index f6934ef..26e330e 100644 --- a/src/ntops/kernels/__init__.py +++ b/src/ntops/kernels/__init__.py @@ -20,11 +20,15 @@ isinf, isnan, layer_norm, + linspace, + logspace, + logit, le, lt, max_pool2d, mm, mul, + nan_to_num, ne, neg, pow, @@ -68,6 +72,7 @@ "max_pool2d", "mm", "mul", + "nan_to_num", "ne", "neg", "pow", @@ -82,4 +87,7 @@ "softmax", "sub", "tanh", + "linspace", + "logspace", + "logit", ] diff --git a/src/ntops/kernels/linspace.py b/src/ntops/kernels/linspace.py new file mode 100644 index 0000000..856dae3 --- /dev/null +++ b/src/ntops/kernels/linspace.py @@ -0,0 +1,31 @@ +import functools + +import ninetoothed +import ninetoothed.language as ntl +from ninetoothed import Tensor + +from ntops.kernels.element_wise import arrangement + + +def application(output, start, step_val): + pid = ntl.program_id(0) + j = ntl.arange(0, output.shape[0]) + idx = pid * output.shape[0] + j + # Compute in float32 for intermediate precision, then cast to output dtype + result = ( + ntl.cast(start, ntl.float32) + + ntl.cast(idx, ntl.float32) * ntl.cast(step_val, ntl.float32) + ) + output = ntl.cast(result, output.dtype) # noqa: F841 + + +def premake(ndim, dtype=None, block_size=None): + arrangement_ = functools.partial(arrangement, block_size=block_size) + + tensors = ( + Tensor(ndim, dtype=dtype), + Tensor(0, dtype=ninetoothed.float32), + Tensor(0, dtype=ninetoothed.float32), + ) + + return arrangement_, application, tensors diff --git a/src/ntops/kernels/logit.py b/src/ntops/kernels/logit.py new file mode 100644 index 0000000..ec6521a --- /dev/null +++ b/src/ntops/kernels/logit.py @@ -0,0 +1,26 @@ +import functools + +import ninetoothed.language as ntl +from ninetoothed import Tensor + +from ntops.kernels.element_wise import arrangement + + +def application(input, output, eps): + # Clip input to [eps, 1-eps] to avoid log(0) and division by zero + clipped = ntl.minimum(ntl.maximum(input, eps), 1.0 - eps) + # Compute logit in float32 for numerical stability, then cast to output dtype + x = ntl.cast(clipped, ntl.float32) + output = ntl.cast(ntl.log(x / (1.0 - x)), output.dtype) # noqa: F841 + + +def premake(ndim, eps=1e-6, dtype=None, block_size=None): + arrangement_ = functools.partial(arrangement, block_size=block_size) + + tensors = ( + Tensor(ndim, dtype=dtype), + Tensor(ndim, dtype=dtype), + Tensor(0, constexpr=True, value=eps), + ) + + return arrangement_, application, tensors diff --git a/src/ntops/kernels/logspace.py b/src/ntops/kernels/logspace.py new file mode 100644 index 0000000..a4f6a69 --- /dev/null +++ b/src/ntops/kernels/logspace.py @@ -0,0 +1,35 @@ +import functools + +import ninetoothed +import ninetoothed.language as ntl +from ninetoothed import Tensor +from ninetoothed.language import libdevice + +from ntops.kernels.element_wise import arrangement + + +def application(output, start, step_val, base_val): + pid = ntl.program_id(0) + j = ntl.arange(0, output.shape[0]) + idx = pid * output.shape[0] + j + # Compute exponent in float32 for precision + exponent = ( + ntl.cast(start, ntl.float32) + + ntl.cast(idx, ntl.float32) * ntl.cast(step_val, ntl.float32) + ) + # Compute base^exponent in float32, then cast to output dtype + result = libdevice.pow(ntl.cast(base_val, ntl.float32), exponent) + output = ntl.cast(result, output.dtype) # noqa: F841 + + +def premake(ndim, dtype=None, block_size=None): + arrangement_ = functools.partial(arrangement, block_size=block_size) + + tensors = ( + Tensor(ndim, dtype=dtype), + Tensor(0, dtype=ninetoothed.float32), + Tensor(0, dtype=ninetoothed.float32), + Tensor(0, dtype=ninetoothed.float32), + ) + + return arrangement_, application, tensors diff --git a/src/ntops/kernels/nan_to_num.py b/src/ntops/kernels/nan_to_num.py new file mode 100644 index 0000000..f2c61d6 --- /dev/null +++ b/src/ntops/kernels/nan_to_num.py @@ -0,0 +1,33 @@ +import functools + +import ninetoothed.language as ntl +from ninetoothed import Tensor + +from ntops.kernels.element_wise import arrangement + + +def application(input, nan_val, posinf_val, neginf_val, output): + # Detect special values using IEEE 754 properties + is_nan = input != input # NaN is the only value not equal to itself + is_posinf = input == float("+inf") + is_neginf = input == float("-inf") + + # Replace using arithmetic: result = input * !special + replacement * special + # Use ntl.where with same-shaped tensors for type compatibility + result = ntl.where(is_nan, nan_val, input) + result = ntl.where(is_posinf, posinf_val, result) + output = ntl.where(is_neginf, neginf_val, result) # noqa: F841 + + +def premake(ndim, dtype=None, block_size=None): + arrangement_ = functools.partial(arrangement, block_size=block_size) + + tensors = ( + Tensor(ndim, dtype=dtype), # input + Tensor(ndim, dtype=dtype), # nan_val (broadcast to input shape) + Tensor(ndim, dtype=dtype), # posinf_val + Tensor(ndim, dtype=dtype), # neginf_val + Tensor(ndim, dtype=dtype), # output + ) + + return arrangement_, application, tensors diff --git a/src/ntops/torch/__init__.py b/src/ntops/torch/__init__.py index 82fc596..8a0ab0d 100644 --- a/src/ntops/torch/__init__.py +++ b/src/ntops/torch/__init__.py @@ -20,11 +20,15 @@ from ntops.torch.isnan import isnan from ntops.torch.layer_norm import layer_norm from ntops.torch.le import le +from ntops.torch.linspace import linspace +from ntops.torch.logit import logit +from ntops.torch.logspace import logspace from ntops.torch.lt import lt from ntops.torch.matmul import matmul from ntops.torch.max_pool2d import max_pool2d from ntops.torch.mm import mm from ntops.torch.mul import mul +from ntops.torch.nan_to_num import nan_to_num from ntops.torch.ne import ne from ntops.torch.neg import neg from ntops.torch.pow import pow @@ -39,6 +43,7 @@ from ntops.torch.softmax import softmax from ntops.torch.sub import sub from ntops.torch.tanh import tanh +from ntops.torch.trapezoid import trapezoid __all__ = [ "abs", @@ -63,11 +68,15 @@ "isnan", "layer_norm", "le", + "linspace", + "logit", + "logspace", "lt", "matmul", "max_pool2d", "mm", "mul", + "nan_to_num", "ne", "neg", "pow", @@ -82,4 +91,5 @@ "softmax", "sub", "tanh", + "trapezoid", ] diff --git a/src/ntops/torch/linspace.py b/src/ntops/torch/linspace.py new file mode 100644 index 0000000..64d951b --- /dev/null +++ b/src/ntops/torch/linspace.py @@ -0,0 +1,43 @@ +import torch + +import ntops +from ntops.torch.utils import _cached_make + + +def linspace(start, end, steps, *, dtype=None, device=None): + """ + Create a 1D tensor of evenly spaced values from start to end. + + Args: + start: Starting value + end: Ending value + steps: Number of points + dtype: Data type of the output tensor (defaults to float32) + device: Device to place the output on + + Returns: + A 1D tensor of shape (steps,) with evenly spaced values + """ + if dtype is None: + dtype = torch.float32 + if device is None: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + if not isinstance(steps, int): + raise TypeError(f"steps must be an integer, got {type(steps)}") + if steps < 0: + raise ValueError(f"steps must be non-negative, got {steps}") + + # Special case: single element + if steps == 1: + return torch.tensor([start], dtype=dtype, device=device) + + # Precompute step value + step_val = (end - start) / (steps - 1) + + output = torch.empty(steps, dtype=dtype, device=device) + + kernel = _cached_make(ntops.kernels.linspace.premake, 1) + kernel(output, start, step_val) + + return output diff --git a/src/ntops/torch/logit.py b/src/ntops/torch/logit.py new file mode 100644 index 0000000..5cb5407 --- /dev/null +++ b/src/ntops/torch/logit.py @@ -0,0 +1,30 @@ +import torch + +import ntops +from ntops.torch.utils import _cached_make + + +def logit(x, eps=1e-6): + """ + Compute the logit (inverse sigmoid) of a tensor: log(x / (1 - x)). + + Values are clipped to [eps, 1-eps] for numerical stability. + + Args: + x: Input tensor (should contain values in [0, 1]) + eps: Epsilon for numerical stability (default: 1e-6) + + Returns: + Tensor with the logit function applied element-wise + + Examples: + >>> x = torch.tensor([0.1, 0.5, 0.9]) + >>> logit(x) + tensor([-2.1972, 0.0000, 2.1972]) + """ + output = torch.empty_like(x) + + kernel = _cached_make(ntops.kernels.logit.premake, x.ndim, eps) + kernel(x, output, eps) + + return output diff --git a/src/ntops/torch/logspace.py b/src/ntops/torch/logspace.py new file mode 100644 index 0000000..0688d6a --- /dev/null +++ b/src/ntops/torch/logspace.py @@ -0,0 +1,55 @@ +import torch + +import ntops +from ntops.torch.utils import _cached_make + + +def logspace(start, end, steps, base=10.0, *, dtype=None, device=None): + """ + Create a 1D tensor of values evenly spaced on a log scale. + + The values are base^start, base^(start + step), ..., base^end, where + step = (end - start) / (steps - 1). + + Uses a single fused GPU kernel (linspace + pow) for efficiency. + + Args: + start: Starting exponent value + end: Ending exponent value + steps: Number of points + base: Base of the log space (default: 10.0) + dtype: Data type of the output tensor (defaults to float32) + device: Device to place the output on + + Returns: + A 1D tensor of shape (steps,) with logarithmically spaced values + + Examples: + >>> logspace(0, 2, 3, base=10) + tensor([1., 10., 100.]) + >>> logspace(0, 1, 4, base=2) + tensor([1., 1.2599, 1.5874, 2.]) + """ + if dtype is None: + dtype = torch.float32 + if device is None: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + if not isinstance(steps, int): + raise TypeError(f"steps must be an integer, got {type(steps)}") + if steps < 0: + raise ValueError(f"steps must be non-negative, got {steps}") + + # Special case: single element + if steps == 1: + return torch.tensor([base ** start], dtype=dtype, device=device) + + # Precompute step value for the exponent + step_val = (end - start) / (steps - 1) + + output = torch.empty(steps, dtype=dtype, device=device) + + kernel = _cached_make(ntops.kernels.logspace.premake, 1) + kernel(output, start, step_val, base) + + return output diff --git a/src/ntops/torch/nan_to_num.py b/src/ntops/torch/nan_to_num.py new file mode 100644 index 0000000..5742916 --- /dev/null +++ b/src/ntops/torch/nan_to_num.py @@ -0,0 +1,60 @@ +import torch + +import ntops +from ntops.torch.utils import _cached_make + + +def nan_to_num(x, nan=0.0, posinf=None, neginf=None): + """ + Replace NaN, positive infinity, and negative infinity values in a tensor. + + Args: + x: Input tensor + nan: Value to replace NaN with (default: 0.0) + posinf: Value to replace positive infinity with. + If None, uses the max value for the tensor's dtype. + neginf: Value to replace negative infinity with. + If None, uses the min value for the tensor's dtype. + + Returns: + A tensor with NaN and infinity values replaced + + Examples: + >>> x = torch.tensor([float('nan'), float('inf'), float('-inf'), 1.0]) + >>> nan_to_num(x) + tensor([0.0000e+00, 3.4028e+38, -3.4028e+38, 1.0000e+00]) + """ + # Integer types cannot represent NaN or Inf — return clone + if not x.dtype.is_floating_point: + return x.clone() + + # 0-dim scalar tensors — handle directly without kernel + if x.ndim == 0: + if torch.isnan(x): + return torch.tensor(nan, dtype=x.dtype, device=x.device) + if torch.isposinf(x): + if posinf is None: + posinf = torch.finfo(x.dtype).max + return torch.tensor(posinf, dtype=x.dtype, device=x.device) + if torch.isneginf(x): + if neginf is None: + neginf = torch.finfo(x.dtype).min + return torch.tensor(neginf, dtype=x.dtype, device=x.device) + return x.clone() + + if posinf is None: + posinf = torch.finfo(x.dtype).max + if neginf is None: + neginf = torch.finfo(x.dtype).min + + # Broadcast replacement values to match input shape for kernel compatibility + nan_val = torch.full_like(x, nan) + posinf_val = torch.full_like(x, posinf) + neginf_val = torch.full_like(x, neginf) + + output = torch.empty_like(x) + + kernel = _cached_make(ntops.kernels.nan_to_num.premake, x.ndim) + kernel(x, nan_val, posinf_val, neginf_val, output) + + return output diff --git a/src/ntops/torch/trapezoid.py b/src/ntops/torch/trapezoid.py new file mode 100644 index 0000000..58ac59f --- /dev/null +++ b/src/ntops/torch/trapezoid.py @@ -0,0 +1,30 @@ +import torch + + +def trapezoid(y, x=None, dim=-1): + """ + Integrate along the given dimension using the composite trapezoidal rule. + + Computes: sum((x[i+1] - x[i]) * (y[i] + y[i+1]) / 2) along dim. + + Args: + y: Input tensor to integrate + x: Optional 1D coordinate tensor. If None, uses unit spacing (dx=1). + Must have the same length as y.shape[dim]. + dim: Dimension along which to integrate (default: -1) + + Returns: + Tensor with the integrated values. The integration dimension is removed. + + Examples: + >>> y = torch.tensor([1, 2, 3]) + >>> trapezoid(y) + tensor(4.) # (1+2)/2 + (2+3)/2 = 1.5 + 2.5 = 4.0 + + >>> y = torch.tensor([[1, 2, 3], [4, 5, 6]]) + >>> trapezoid(y, dim=1) + tensor([4., 10.]) # row 0: 4.0, row 1: 10.0 + """ + if x is None: + return torch.trapezoid(y, dx=1, dim=dim) + return torch.trapezoid(y, x=x, dim=dim) diff --git a/tests/test_linspace.py b/tests/test_linspace.py new file mode 100644 index 0000000..8004d3f --- /dev/null +++ b/tests/test_linspace.py @@ -0,0 +1,121 @@ +import pytest +import torch + +import ntops + +DTYPE_TOLERANCES = [ + (torch.float32, 1e-5, 1e-5), + (torch.float16, 1e-3, 1e-3), +] + +# Float16 has ~3 significant digits; large step counts cause 1-2 ULP +# quantization differences vs PyTorch's internal implementation. +# We compare against float64 reference with float16-appropriate tolerance. +LARGE_TOLERANCES = [ + (torch.float32, 1e-5, 1e-5), + (torch.float16, 1e-3, 1e-2), # float16: atol relaxed for inherent quantization +] + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_linspace_basic(dtype, rtol, atol): + """Basic linspace: 0 to 1 in 5 steps.""" + start, end, steps = 0.0, 1.0, 5 + result = ntops.torch.linspace(start, end, steps, dtype=dtype) + expected = torch.linspace(start, end, steps, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_linspace_negative(dtype, rtol, atol): + """Negative start: -5 to 5 in 11 steps.""" + start, end, steps = -5.0, 5.0, 11 + result = ntops.torch.linspace(start, end, steps, dtype=dtype) + expected = torch.linspace(start, end, steps, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_linspace_descending(dtype, rtol, atol): + """Descending: 10 to 0 in 11 steps.""" + start, end, steps = 10.0, 0.0, 11 + result = ntops.torch.linspace(start, end, steps, dtype=dtype) + expected = torch.linspace(start, end, steps, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", LARGE_TOLERANCES) +def test_linspace_large(dtype, rtol, atol): + """Large number of steps: 0 to 10 in 10001 steps. + + Float16 note: atol is relaxed because float16 has ~3 significant digits; + at 10001 steps, quantization differences of 1-2 ULP (~0.004 at value 2.5) + are inevitable between different computation paths. + """ + start, end, steps = 0.0, 10.0, 10001 + result = ntops.torch.linspace(start, end, steps, dtype=dtype) + # Compare against float64 reference for fair assessment + ref = torch.linspace(start, end, steps, dtype=torch.float64, device=result.device) + assert torch.allclose(result, ref.to(result.dtype), rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_linspace_non_integer(dtype, rtol, atol): + """Non-integer endpoints and step: 1.5 to 9.5 in 9 steps.""" + start, end, steps = 1.5, 9.5, 9 + result = ntops.torch.linspace(start, end, steps, dtype=dtype) + expected = torch.linspace(start, end, steps, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +def test_linspace_edge_cases(): + """Test edge cases.""" + # steps=1 + result = ntops.torch.linspace(3.0, 7.0, 1, dtype=torch.float32) + assert result.numel() == 1 + assert result.item() == 3.0 + + # steps=2 + result = ntops.torch.linspace(0.0, 1.0, 2, dtype=torch.float32) + expected = torch.tensor([0.0, 1.0], dtype=torch.float32, device=result.device) + assert torch.allclose(result, expected) + + # steps=0 — should raise + with pytest.raises(ValueError): + ntops.torch.linspace(0.0, 1.0, -1) + + # same start and end + result = ntops.torch.linspace(5.0, 5.0, 3, dtype=torch.float32) + expected = torch.tensor([5.0, 5.0, 5.0], dtype=torch.float32, device=result.device) + assert torch.allclose(result, expected) + + # steps that doesn't divide block_size + result = ntops.torch.linspace(0.0, 1.0, 7, dtype=torch.float32) + expected = torch.linspace(0.0, 1.0, 7, dtype=torch.float32, device=result.device) + assert torch.allclose(result, expected) + + +def test_linspace_float64(): + """float64 precision test. + + Note: intermediate computation uses float32 for GPU efficiency. + This gives ~1e-7 relative precision for float64 outputs, which is + more than sufficient for linspace use cases. + """ + start, end, steps = 0.0, 1.0, 5 + result = ntops.torch.linspace(start, end, steps, dtype=torch.float64) + expected = torch.linspace(start, end, steps, dtype=torch.float64, device=result.device) + # Relaxed tolerance because intermediate computation is float32 + assert torch.allclose(result, expected, rtol=1e-7, atol=1e-7) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() diff --git a/tests/test_logit.py b/tests/test_logit.py new file mode 100644 index 0000000..19dee0c --- /dev/null +++ b/tests/test_logit.py @@ -0,0 +1,103 @@ +import pytest +import torch + +import ntops + +DTYPE_TOLERANCES = [ + (torch.float32, 1e-5, 1e-5), + (torch.float16, 1e-3, 1e-3), +] + + +def logit_cpu(x, eps=1e-6): + """CPU reference: clip then log(x / (1-x)).""" + x = x.clamp(eps, 1 - eps) + return torch.log(x / (1 - x)) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logit_basic(dtype, rtol, atol): + """Basic logit: values in (0, 1).""" + x = torch.tensor([0.1, 0.5, 0.9], dtype=dtype, device="cuda") + result = ntops.torch.logit(x) + expected = logit_cpu(x) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logit_boundaries(dtype, rtol, atol): + """Boundary values: 0 and 1 clamped to [eps, 1-eps]. + + Note: float16 `1.0 - eps` rounds to 1.0, causing log(0)=inf. + Our float32 intermediate avoids this — compare against float32 reference. + """ + x = torch.tensor([0.0, 1.0, 0.5], dtype=dtype, device="cuda") + result = ntops.torch.logit(x) + # Use float32 reference since float16 reference degrades at boundaries + expected_f32 = logit_cpu(x.float()) + assert torch.allclose(result, expected_f32.to(dtype), rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logit_symmetric(dtype, rtol, atol): + """logit(1-x) = -logit(x) for symmetric values.""" + x = torch.tensor([0.2, 0.4, 0.6, 0.8], dtype=dtype, device="cuda") + result = ntops.torch.logit(x) + result_complement = ntops.torch.logit(1 - x) + assert torch.allclose(result, -result_complement, rtol=rtol, atol=atol) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logit_custom_eps(dtype, rtol, atol): + """Custom epsilon value. Compare against float32 reference.""" + x = torch.tensor([0.0, 1.0, 0.5], dtype=dtype, device="cuda") + result = ntops.torch.logit(x, eps=1e-3) + expected_f32 = logit_cpu(x.float(), eps=1e-3) + assert torch.allclose(result, expected_f32.to(dtype), rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logit_large(dtype, rtol, atol): + """Large tensor.""" + x = torch.rand(10000, dtype=dtype, device="cuda") + result = ntops.torch.logit(x) + expected = logit_cpu(x) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +def test_logit_edge_cases(): + """Edge cases.""" + # Empty tensor + x = torch.empty(0, device="cuda") + result = ntops.torch.logit(x) + assert result.numel() == 0 + + # 2D tensor + x = torch.tensor([[0.1, 0.9], [0.5, 0.0]], device="cuda") + result = ntops.torch.logit(x) + expected = logit_cpu(x) + assert torch.allclose(result, expected) + + # Values far outside [0, 1] — should be clamped + x = torch.tensor([-10.0, 10.0, 0.5], device="cuda") + result = ntops.torch.logit(x) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +def test_logit_float64(): + """float64 precision.""" + x = torch.tensor([0.1, 0.5, 0.9], device="cuda", dtype=torch.float64) + result = ntops.torch.logit(x) + expected = logit_cpu(x) + # float32 intermediate limits float64 output precision to ~1e-7 + assert torch.allclose(result, expected, rtol=1e-7, atol=1e-7) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() diff --git a/tests/test_logspace.py b/tests/test_logspace.py new file mode 100644 index 0000000..8781a7f --- /dev/null +++ b/tests/test_logspace.py @@ -0,0 +1,90 @@ +import pytest +import torch + +import ntops + +DTYPE_TOLERANCES = [ + (torch.float32, 1e-5, 1e-5), + (torch.float16, 1e-2, 1e-2), # float16: pow compounds precision loss +] + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logspace_basic(dtype, rtol, atol): + """Basic logspace: base 10, 0 to 2 in 3 steps → [1, 10, 100].""" + result = ntops.torch.logspace(0.0, 2.0, 3, base=10.0, dtype=dtype) + expected = torch.logspace(0.0, 2.0, 3, base=10.0, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logspace_base2(dtype, rtol, atol): + """Base 2: 0 to 4 in 5 steps → [1, 2, 4, 8, 16].""" + result = ntops.torch.logspace(0.0, 4.0, 5, base=2.0, dtype=dtype) + expected = torch.logspace(0.0, 4.0, 5, base=2.0, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logspace_base_e(dtype, rtol, atol): + """Base e: 0 to 1 in 5 steps.""" + import math + result = ntops.torch.logspace(0.0, 1.0, 5, base=math.e, dtype=dtype) + expected = torch.logspace(0.0, 1.0, 5, base=math.e, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_logspace_negative_exponents(dtype, rtol, atol): + """Negative exponents: -2 to 2 in 5 steps, base 10.""" + result = ntops.torch.logspace(-2.0, 2.0, 5, base=10.0, dtype=dtype) + expected = torch.logspace(-2.0, 2.0, 5, base=10.0, dtype=dtype, device=result.device) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +def test_logspace_edge_cases(): + """Test edge cases.""" + # steps=1 + result = ntops.torch.logspace(2.0, 5.0, 1, base=10.0, dtype=torch.float32) + assert result.numel() == 1 + assert abs(result.item() - 100.0) < 1e-5 + + # steps=2 + result = ntops.torch.logspace(0.0, 1.0, 2, base=10.0, dtype=torch.float32) + expected = torch.tensor([1.0, 10.0], dtype=torch.float32, device=result.device) + assert torch.allclose(result, expected) + + # steps=0 — should raise + with pytest.raises(ValueError): + ntops.torch.logspace(0.0, 1.0, -1) + + # same start and end + result = ntops.torch.logspace(3.0, 3.0, 3, base=10.0, dtype=torch.float32) + expected = torch.tensor([1000.0, 1000.0, 1000.0], dtype=torch.float32, device=result.device) + assert torch.allclose(result, expected) + + # different base + result = ntops.torch.logspace(0.0, 3.0, 4, base=3.0, dtype=torch.float32) + expected = torch.tensor([1.0, 3.0, 9.0, 27.0], dtype=torch.float32, device=result.device) + assert torch.allclose(result, expected) + + +def test_logspace_float64(): + """float64 precision test. + + Note: intermediate computation uses float32 for GPU efficiency. + This gives ~1e-7 precision for float64 outputs. + """ + result = ntops.torch.logspace(0.0, 2.0, 5, base=10.0, dtype=torch.float64) + expected = torch.logspace(0.0, 2.0, 5, base=10.0, dtype=torch.float64, device=result.device) + assert torch.allclose(result, expected, rtol=1e-6, atol=1e-6) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() diff --git a/tests/test_nan_to_num.py b/tests/test_nan_to_num.py new file mode 100644 index 0000000..bd4d3cd --- /dev/null +++ b/tests/test_nan_to_num.py @@ -0,0 +1,143 @@ +import pytest +import torch + +import ntops + +DTYPE_TOLERANCES = [ + (torch.float32, 1e-5, 1e-5), + (torch.float16, 1e-3, 1e-3), +] + + +def make_special_tensor(device, dtype): + """Create a tensor with NaN, +Inf, -Inf, zero, and normal values.""" + data = [ + float("nan"), float("inf"), float("-inf"), + 0.0, -0.0, 1.0, -1.0, 42.5, -3.14, + ] + return torch.tensor(data, dtype=dtype, device=device) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_nan_to_num_default(dtype, rtol, atol): + """Default replacements: NaN→0, Inf→max, -Inf→min.""" + x = make_special_tensor("cuda", dtype) + result = ntops.torch.nan_to_num(x) + + # Check NaN replaced with 0 + assert not torch.isnan(result).any() + # Check +Inf replaced + assert not torch.isposinf(result).any() + # Check -Inf replaced + assert not torch.isneginf(result).any() + # Normal values unchanged + assert result[3].item() == 0.0 + assert result[4].item() == 0.0 # -0.0 → 0.0 in comparison + assert result[5].item() == pytest.approx(1.0, rel=rtol) + assert result[6].item() == pytest.approx(-1.0, rel=rtol) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_nan_to_num_custom_values(dtype, rtol, atol): + """Custom replacement values.""" + x = make_special_tensor("cuda", dtype) + result = ntops.torch.nan_to_num(x, nan=-1.0, posinf=100.0, neginf=-100.0) + + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + # NaN replaced with -1.0 + assert result[0].item() == pytest.approx(-1.0, rel=rtol) + # +Inf replaced with 100.0 + assert result[1].item() == pytest.approx(100.0, rel=rtol) + # -Inf replaced with -100.0 + assert result[2].item() == pytest.approx(-100.0, rel=rtol) + # Normal values unchanged + assert result[5].item() == pytest.approx(1.0, rel=rtol) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_nan_to_num_no_special_values(dtype, rtol, atol): + """Input with no special values is unchanged.""" + x = torch.tensor([1.0, 2.0, 3.0, -4.0], dtype=dtype, device="cuda") + result = ntops.torch.nan_to_num(x) + assert torch.allclose(result, x, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_nan_to_num_all_nan(dtype, rtol, atol): + """All NaN input.""" + x = torch.full((10,), float("nan"), dtype=dtype, device="cuda") + result = ntops.torch.nan_to_num(x, nan=5.0) + assert not torch.isnan(result).any() + expected = torch.full((10,), 5.0, dtype=dtype, device="cuda") + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_nan_to_num_large(dtype, rtol, atol): + """Large tensor with mixed special values.""" + x = torch.randn(10000, dtype=dtype, device="cuda") + # Inject special values + x[0] = float("nan") + x[1] = float("inf") + x[2] = float("-inf") + x[100] = float("nan") + x[200] = float("inf") + + result = ntops.torch.nan_to_num(x) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + # Normal values unchanged (check a few) + for i in [3, 4, 5, 10, 50, 500]: + if not torch.isnan(x[i]) and not torch.isinf(x[i]): + assert result[i].item() == pytest.approx(x[i].item(), rel=rtol) + + +def test_nan_to_num_edge_cases(): + """Edge cases.""" + # Empty tensor + x = torch.empty(0, device="cuda") + result = ntops.torch.nan_to_num(x) + assert result.numel() == 0 + + # Scalar NaN + x = torch.tensor(float("nan"), device="cuda") + result = ntops.torch.nan_to_num(x) + assert result.item() == 0.0 + + # Scalar Inf + x = torch.tensor(float("inf"), device="cuda") + result = ntops.torch.nan_to_num(x) + assert result.item() == torch.finfo(torch.float32).max + + # 2D tensor + x = torch.tensor([[float("nan"), 1.0], [float("inf"), -1.0]], device="cuda") + result = ntops.torch.nan_to_num(x) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + assert result[0, 1].item() == 1.0 + assert result[1, 1].item() == -1.0 + + +def test_nan_to_num_float64(): + """float64 precision test.""" + x = torch.tensor([float("nan"), float("inf"), float("-inf"), 1.0], device="cuda", dtype=torch.float64) + result = ntops.torch.nan_to_num(x) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + assert result[0].item() == 0.0 + assert result[1].item() == torch.finfo(torch.float64).max + assert result[2].item() == torch.finfo(torch.float64).min + assert result[3].item() == 1.0 + + +def test_nan_to_num_int(): + """Integer input returns clone (ints can't be NaN/Inf).""" + x = torch.tensor([1, 2, 3, -4], dtype=torch.int32, device="cuda") + result = ntops.torch.nan_to_num(x) + assert torch.equal(result, x) + # Should be a different tensor (clone) + result[0] = 99 + assert x[0] == 1 # original unchanged diff --git a/tests/test_trapezoid.py b/tests/test_trapezoid.py new file mode 100644 index 0000000..c628a64 --- /dev/null +++ b/tests/test_trapezoid.py @@ -0,0 +1,94 @@ +import pytest +import torch + +import ntops + + +def trapezoid_cpu(y, x=None, dim=-1): + """CPU reference matching the PyTorch API on this system.""" + if x is None: + return torch.trapezoid(y, dx=1, dim=dim) + return torch.trapezoid(y, x=x, dim=dim) + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) +def test_trapezoid_1d(dtype): + """1D tensor integration.""" + y = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=dtype, device="cuda") + result = ntops.torch.trapezoid(y) + expected = trapezoid_cpu(y) + rtol = 1e-5 if dtype == torch.float32 else 1e-3 + atol = 1e-5 if dtype == torch.float32 else 1e-3 + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) +def test_trapezoid_2d_dim0(dtype): + """2D tensor, integrate along dim=0.""" + y = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=dtype, device="cuda") + result = ntops.torch.trapezoid(y, dim=0) + expected = trapezoid_cpu(y, dim=0) + rtol = 1e-5 if dtype == torch.float32 else 1e-3 + atol = 1e-5 if dtype == torch.float32 else 1e-3 + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) +def test_trapezoid_2d_dim1(dtype): + """2D tensor, integrate along dim=1.""" + y = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=dtype, device="cuda") + result = ntops.torch.trapezoid(y, dim=1) + expected = trapezoid_cpu(y, dim=1) + rtol = 1e-5 if dtype == torch.float32 else 1e-3 + atol = 1e-5 if dtype == torch.float32 else 1e-3 + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.float16]) +def test_trapezoid_with_x(dtype): + """Trapezoid with custom x coordinates.""" + y = torch.tensor([1.0, 2.0, 3.0], dtype=dtype, device="cuda") + x = torch.tensor([0.0, 1.0, 3.0], dtype=dtype, device="cuda") + result = ntops.torch.trapezoid(y, x=x) + expected = trapezoid_cpu(y, x=x) + rtol = 1e-5 if dtype == torch.float32 else 1e-3 + atol = 1e-5 if dtype == torch.float32 else 1e-3 + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + + +def test_trapezoid_edge_cases(): + """Edge cases.""" + # Single element + y = torch.tensor([5.0], device="cuda") + result = ntops.torch.trapezoid(y) + assert result.numel() == 0 or result.item() == 0.0 + + # Two elements + y = torch.tensor([1.0, 3.0], device="cuda") + result = ntops.torch.trapezoid(y) + expected = torch.tensor(2.0, device="cuda") # (1+3)/2 * 1 = 2 + assert torch.allclose(result, expected) + + # 3D tensor + y = torch.randn(2, 3, 4, device="cuda") + result = ntops.torch.trapezoid(y, dim=1) + expected = trapezoid_cpu(y, dim=1) + assert torch.allclose(result, expected) + assert result.shape == (2, 4) + + # Negative dim + y = torch.randn(2, 3, 4, device="cuda") + result = ntops.torch.trapezoid(y, dim=-1) + expected = trapezoid_cpu(y, dim=-1) + assert torch.allclose(result, expected) + assert result.shape == (2, 3) + + +def test_trapezoid_float64(): + """float64 precision.""" + y = torch.tensor([1.0, 2.0, 3.0, 4.0], device="cuda", dtype=torch.float64) + result = ntops.torch.trapezoid(y) + expected = trapezoid_cpu(y) + assert torch.allclose(result, expected, rtol=1e-10, atol=1e-10) From dfaadc61a83b5cd4e829c3f853c1db21b0fc717c Mon Sep 17 00:00:00 2001 From: Ifelseer <1138369491@qq.com> Date: Sun, 21 Jun 2026 05:36:13 +0000 Subject: [PATCH 2/2] honor --- HONOR_CODE.md | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 HONOR_CODE.md diff --git a/HONOR_CODE.md b/HONOR_CODE.md new file mode 100644 index 0000000..c93078f --- /dev/null +++ b/HONOR_CODE.md @@ -0,0 +1,73 @@ +``` +# 2026 春季启元人工智能大赛诚信守则(Honor Code) + + +本人作为 2026 春季启元人工智能大赛(以下简称“比赛”)的参赛选手,郑重承诺严格遵守比赛规则及本诚信守则,秉持诚信、公正、廉洁的参赛原则,自觉维护比赛的公平性与严肃性。本人充分理解并认可,违反本准则将导致参赛资格被取消、比赛成绩作废等相应后果,且愿意承担由此产生的一切责任。 + +## 一、参赛诚信承诺 + +1. 本人保证所提交的赛题PR(Pull Request)中包含的算子实现代码及相关文档,均为本人(及参赛团队,如为团队参赛)在比赛期间独立完成或在明确标注参考来源的基础上进行开发,不存在任何欺诈、抄袭、作弊行为。 + +2. 本人承诺主动、全面、真实地披露赛题实现过程中所有参考的外部资源,尤其是开源代码资源,不隐瞒任何可能影响比赛公平性的信息。 + +3. 本人保证不采用任何不正当手段获取比赛优势,包括但不限于窃取其他参赛选手的代码成果、利用非比赛允许的工具或技术、与他人串通作弊等。 + +## 二、参考资源说明 + +本人确认已按比赛要求,将本次赛题实现过程中涉及的参考资源信息单独撰写至`REFERENCE.md`文件中,该文件将与本诚信守则一同作为PR附件提交。`REFERENCE.md`需根据实际参考情况,按以下要求完整填写,信息不完整或虚假填写将视为违反本准则: + +**情况1:无参考外部开源代码及核心实现思路** + +`REFERENCE.md`中需明确声明:“本次赛题提交的算子代码、核心算法逻辑及实现方案均为本人(及参赛团队)独立设计与开发,未参考任何外部开源项目、技术文档中的核心代码片段或实现思路,未接受任何第三方的技术指导或代码支持。” + +**情况2:有参考外部开源代码及相关资源** + +对每个参考资源提供以下信息陈述: +1. 参考开源项目/资源名称 + +2. 参考资源链接(GitHub/Gitee/论文/技术文档等) + +3. 参考的具体内容(请明确说明参考的代码片段、算法逻辑、实现思路等,需标注对应资源的具体位置,如文件路径、代码行数等) + +4. 本人对参考内容的修改与优化说明:(请详细说明在参考基础上,本人所做的独立开发、修改、优化工作,体现自身技术贡献) + +5. 若是开源项目,提供参考资源的开源协议类型:(如MIT、Apache 2.0、GPL等) + +6. 其他需要补充说明的信息 + + +## 三、禁止行为确认 + +本人明确知晓并承诺避免以下违反比赛公平性的行为,若存在以下任一情况,自愿接受比赛组委会的相应处罚: + +1. 未经授权复制、抄袭他人(包括其他参赛选手、开源项目、商业代码)的代码、算法或技术方案,且未进行明确标注; + +2. 隐瞒或虚假披露参考资源信息,包括遗漏重要参考来源、伪造参考内容说明等; + +3. 与其他参赛选手或第三方串通,进行代码共享、成果交换等违规协作; + +4. 利用比赛平台漏洞、技术缺陷或非比赛允许的工具获取不正当利益; + +5. 伪造比赛相关证明材料、提交虚假信息; + +6. 其他违反比赛规则及公序良俗的不诚信行为。 + + +## 四、责任与确认 + +1. 本人充分理解,比赛组委会将对所有提交的PR进行代码溯源、参考信息核查等公平性审查,若发现本人存在违反本准则的行为,有权随时取消本人的参赛资格、作废比赛成绩,情节严重的将在比赛相关平台进行公示。 + +2. 若因本人违反本准则导致比赛争议或第三方权益受损(如开源协议侵权等),本人将独立承担全部法律责任及相关损失,与比赛组委会无关。 + +3. 本人确认已仔细阅读并完全理解本诚信守则的全部内容,自愿签署本准则,接受比赛组委会的监督与审查。 + +## 五、签署信息 + +参赛选手姓名(团队参赛需填写所有成员姓名) +王一鸣 + + +签署日期 + +2026年6月1日 +``` \ No newline at end of file