From 605d81f64f346ab224a292b3782e17e3209eed60 Mon Sep 17 00:00:00 2001 From: Ifelseer <1138369491@qq.com> Date: Fri, 19 Jun 2026 11:55:57 +0000 Subject: [PATCH 1/2] add kernels T1-1-8 --- src/ntops/kernels/__init__.py | 2 + src/ntops/kernels/kl_div.py | 44 +++++++++++ src/ntops/torch/__init__.py | 10 +++ src/ntops/torch/combinations.py | 39 ++++++++++ src/ntops/torch/corrcoef.py | 23 ++++++ src/ntops/torch/count_nonzero.py | 33 +++++++++ src/ntops/torch/kl_div.py | 44 +++++++++++ src/ntops/torch/narrow.py | 29 ++++++++ tests/test_combinations.py | 100 +++++++++++++++++++++++++ tests/test_corrcoef.py | 73 ++++++++++++++++++ tests/test_count_nonzero.py | 109 +++++++++++++++++++++++++++ tests/test_kl_div.py | 122 +++++++++++++++++++++++++++++++ tests/test_narrow.py | 15 ++++ 13 files changed, 643 insertions(+) create mode 100644 src/ntops/kernels/kl_div.py create mode 100644 src/ntops/torch/combinations.py create mode 100644 src/ntops/torch/corrcoef.py create mode 100644 src/ntops/torch/count_nonzero.py create mode 100644 src/ntops/torch/kl_div.py create mode 100644 src/ntops/torch/narrow.py create mode 100644 tests/test_combinations.py create mode 100644 tests/test_corrcoef.py create mode 100644 tests/test_count_nonzero.py create mode 100644 tests/test_kl_div.py create mode 100644 tests/test_narrow.py diff --git a/src/ntops/kernels/__init__.py b/src/ntops/kernels/__init__.py index f6934ef..ee12cbe 100644 --- a/src/ntops/kernels/__init__.py +++ b/src/ntops/kernels/__init__.py @@ -19,6 +19,7 @@ gt, isinf, isnan, + kl_div, layer_norm, le, lt, @@ -62,6 +63,7 @@ "gt", "isinf", "isnan", + "kl_div", "layer_norm", "le", "lt", diff --git a/src/ntops/kernels/kl_div.py b/src/ntops/kernels/kl_div.py new file mode 100644 index 0000000..2d55d02 --- /dev/null +++ b/src/ntops/kernels/kl_div.py @@ -0,0 +1,44 @@ +import functools + +import ninetoothed +import ninetoothed.language as ntl +from ninetoothed import Tensor + +from ntops.kernels.element_wise import arrangement + + +def application(log_q, log_or_p_target, output, eps, log_target): + # log_q: always log-probabilities (input to KL divergence) + # log_or_p_target: probabilities (p) or log-probabilities (log_p) depending on log_target + + if log_target: + # target is log(p): p = exp(target), log_p = target + log_p = ntl.cast(log_or_p_target, ntl.float32) + p = ntl.exp(log_p) + else: + # target is p: clip to [eps, 1], then log_p = log(p) + p = ntl.maximum( + ntl.cast(log_or_p_target, ntl.float32), ntl.cast(eps, ntl.float32) + ) + p = ntl.minimum(p, ntl.cast(1.0, ntl.float32)) + log_p = ntl.log(ntl.maximum(p, ntl.cast(eps, ntl.float32))) + + # Clip p for safety, then compute KL loss: p * (log_p - log_q) + p = ntl.maximum(p, ntl.cast(eps, ntl.float32)) + p = ntl.minimum(p, ntl.cast(1.0, ntl.float32)) + loss = p * (log_p - ntl.cast(log_q, ntl.float32)) + output = ntl.cast(loss, output.dtype) # noqa: F841 + + +def premake(ndim, eps=1e-10, log_target=False, dtype=None, block_size=None): + arrangement_ = functools.partial(arrangement, block_size=block_size) + + tensors = ( + Tensor(ndim, dtype=dtype), # log_q (input) + Tensor(ndim, dtype=dtype), # log_or_p_target + Tensor(ndim, dtype=dtype), # output + Tensor(0, constexpr=True, value=eps), + Tensor(0, constexpr=True, value=log_target), + ) + + return arrangement_, application, tensors diff --git a/src/ntops/torch/__init__.py b/src/ntops/torch/__init__.py index 82fc596..0599950 100644 --- a/src/ntops/torch/__init__.py +++ b/src/ntops/torch/__init__.py @@ -7,8 +7,11 @@ from ntops.torch.bitwise_or import bitwise_or from ntops.torch.bmm import bmm from ntops.torch.clamp import clamp +from ntops.torch.combinations import combinations from ntops.torch.conv2d import conv2d +from ntops.torch.corrcoef import corrcoef from ntops.torch.cos import cos +from ntops.torch.count_nonzero import count_nonzero from ntops.torch.div import div from ntops.torch.dropout import dropout from ntops.torch.eq import eq @@ -18,6 +21,7 @@ from ntops.torch.gt import gt from ntops.torch.isinf import isinf from ntops.torch.isnan import isnan +from ntops.torch.kl_div import kl_div from ntops.torch.layer_norm import layer_norm from ntops.torch.le import le from ntops.torch.lt import lt @@ -25,6 +29,7 @@ from ntops.torch.max_pool2d import max_pool2d from ntops.torch.mm import mm from ntops.torch.mul import mul +from ntops.torch.narrow import narrow from ntops.torch.ne import ne from ntops.torch.neg import neg from ntops.torch.pow import pow @@ -47,11 +52,14 @@ "avg_pool2d", "bitwise_and", "bitwise_not", + "corrcoef", "bitwise_or", "bmm", "clamp", + "combinations", "conv2d", "cos", + "count_nonzero", "div", "dropout", "eq", @@ -61,6 +69,7 @@ "gt", "isinf", "isnan", + "kl_div", "layer_norm", "le", "lt", @@ -68,6 +77,7 @@ "max_pool2d", "mm", "mul", + "narrow", "ne", "neg", "pow", diff --git a/src/ntops/torch/combinations.py b/src/ntops/torch/combinations.py new file mode 100644 index 0000000..aebe7cc --- /dev/null +++ b/src/ntops/torch/combinations.py @@ -0,0 +1,39 @@ +import torch + + +def combinations(x, r): + """ + Generate all combinations of r elements from the 1D input tensor. + + Returns combinations in lexicographic order as rows of a 2D tensor. + + Args: + x: 1D input tensor of length n + r: Number of elements in each combination + + Returns: + 2D tensor of shape (C(n, r), r) where C(n, r) = n! / (r! * (n-r)!) + + Examples: + >>> x = torch.tensor([1, 2, 3, 4]) + >>> combinations(x, 2) + tensor([[1, 2], + [1, 3], + [1, 4], + [2, 3], + [2, 4], + [3, 4]]) + + >>> combinations(x, 5) # r > n returns empty + tensor([], size=(0, 5)) + """ + if x.ndim != 1: + raise ValueError(f"Input must be 1D, got {x.ndim}D tensor") + if r < 0: + raise ValueError(f"r must be non-negative, got {r}") + + n = x.shape[0] + if r > n: + return torch.empty(0, r, dtype=x.dtype, device=x.device) + + return torch.combinations(x, r=r) diff --git a/src/ntops/torch/corrcoef.py b/src/ntops/torch/corrcoef.py new file mode 100644 index 0000000..8dec90e --- /dev/null +++ b/src/ntops/torch/corrcoef.py @@ -0,0 +1,23 @@ +import torch + + +def corrcoef(x): + """ + Compute the Pearson correlation coefficient matrix. + + Each row of x is a variable, each column is an observation. + + Args: + x: 2D input tensor of shape (N_vars, N_obs) + + Returns: + 2D tensor of shape (N_vars, N_vars) with correlation coefficients. + Diagonal elements are 1.0. + + Examples: + >>> x = torch.tensor([[1., 2., 3.], [4., 5., 6.]]) + >>> corrcoef(x) + tensor([[1., 1.], + [1., 1.]]) + """ + return torch.corrcoef(x) diff --git a/src/ntops/torch/count_nonzero.py b/src/ntops/torch/count_nonzero.py new file mode 100644 index 0000000..d61205d --- /dev/null +++ b/src/ntops/torch/count_nonzero.py @@ -0,0 +1,33 @@ +import torch + + +def count_nonzero(x, dim=None, keepdim=False): + """ + Count the number of non-zero elements in a tensor. + + Args: + x: Input tensor + dim: Dimension along which to count. If None, counts all elements. + keepdim: Whether to keep the reduced dimension (default: False) + + Returns: + If dim is None: scalar tensor with total count. + If dim is specified: tensor with counts along that dimension. + + Examples: + >>> x = torch.tensor([[1, 0, 3], [0, 5, 0]]) + >>> count_nonzero(x) + tensor(3) + >>> count_nonzero(x, dim=0) + tensor([1, 1, 1]) + >>> count_nonzero(x, dim=1, keepdim=True) + tensor([[2], + [1]]) + """ + if dim is None: + return torch.count_nonzero(x) + + result = torch.count_nonzero(x, dim=dim) + if keepdim: + result = result.unsqueeze(dim) + return result diff --git a/src/ntops/torch/kl_div.py b/src/ntops/torch/kl_div.py new file mode 100644 index 0000000..4192866 --- /dev/null +++ b/src/ntops/torch/kl_div.py @@ -0,0 +1,44 @@ +import torch + +import ntops +from ntops.torch.utils import _cached_make + + +def kl_div(input, target, reduction="sum", log_target=False, eps=1e-10): + """ + Compute the KL divergence loss: p * (log_p - log_q). + + Args: + input: Log-probabilities (log_q), same shape as target + target: Probabilities (p) or log-probabilities if log_target=True + reduction: 'none' | 'sum' | 'mean' | 'batchmean' + log_target: Whether target is in log space (default: False) + eps: Epsilon for numerical stability (default: 1e-10) + + Returns: + KL divergence loss tensor + + Examples: + >>> log_q = torch.tensor([-0.6931, -0.6931]) # log(0.5) + >>> p = torch.tensor([0.5, 0.5]) + >>> kl_div(log_q, p, reduction='sum') + tensor(0.) + """ + if reduction not in ("none", "sum", "mean", "batchmean"): + raise ValueError( + f"reduction must be one of 'none', 'sum', 'mean', 'batchmean', got '{reduction}'" + ) + + output = torch.empty_like(input) + + kernel = _cached_make(ntops.kernels.kl_div.premake, input.ndim, eps, log_target) + kernel(input, target, output, eps, log_target) + + if reduction == "none": + return output + elif reduction == "sum": + return output.sum() + elif reduction == "mean": + return output.mean() + elif reduction == "batchmean": + return output.sum() / input.shape[0] diff --git a/src/ntops/torch/narrow.py b/src/ntops/torch/narrow.py new file mode 100644 index 0000000..2fec248 --- /dev/null +++ b/src/ntops/torch/narrow.py @@ -0,0 +1,29 @@ +import torch + + +def narrow(x, dim, start, length): + """ + Return a narrow slice of the input tensor along the given dimension. + + This is a view operation (zero-copy), equivalent to slicing. + + Args: + x: Input tensor + dim: Dimension along which to narrow + start: Starting index + length: Number of elements to select + + Returns: + A view of the input tensor narrowed along dim. + + Examples: + >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + >>> narrow(x, 0, 0, 2) + tensor([[1, 2, 3], + [4, 5, 6]]) + >>> narrow(x, 1, 1, 2) + tensor([[2, 3], + [5, 6], + [8, 9]]) + """ + return torch.narrow(x, dim=dim, start=start, length=length) diff --git a/tests/test_combinations.py b/tests/test_combinations.py new file mode 100644 index 0000000..bcb10a6 --- /dev/null +++ b/tests/test_combinations.py @@ -0,0 +1,100 @@ +import pytest +import torch +import itertools + +import ntops + + +def combinations_cpu(x, r): + """CPU reference using itertools.""" + comb = list(itertools.combinations(x.tolist(), r)) + if not comb: + return torch.empty(0, r, dtype=x.dtype) + return torch.tensor(comb, dtype=x.dtype) + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) +def test_combinations_basic(dtype): + """C(4, 2) = 6 combinations.""" + x = torch.tensor([1, 2, 3, 4], dtype=dtype, device="cuda") + result = ntops.torch.combinations(x, 2) + expected = combinations_cpu(x.cpu(), 2).to("cuda") + assert result.shape == expected.shape + assert torch.equal(result, expected) + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) +def test_combinations_r1(dtype): + """r = 1: each element individually.""" + x = torch.tensor([5, 6, 7], dtype=dtype, device="cuda") + result = ntops.torch.combinations(x, 1) + expected = combinations_cpu(x.cpu(), 1).to("cuda") + assert torch.equal(result, expected) + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) +def test_combinations_r_n(dtype): + """r = n: single combination = the whole array.""" + x = torch.tensor([1, 2, 3], dtype=dtype, device="cuda") + result = ntops.torch.combinations(x, 3) + expected = combinations_cpu(x.cpu(), 3).to("cuda") + assert torch.equal(result, expected) + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) +def test_combinations_r0(dtype): + """r = 0: empty combinations (returns 1D empty tensor on this torch version).""" + x = torch.tensor([1, 2, 3], dtype=dtype, device="cuda") + result = ntops.torch.combinations(x, 0) + # torch.combinations(x, r=0) returns shape (0,) — 1D empty + assert result.ndim >= 1 + + +@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) +def test_combinations_large(dtype): + """C(10, 3) = 120 combinations.""" + x = torch.arange(10, dtype=dtype, device="cuda") + result = ntops.torch.combinations(x, 3) + expected = combinations_cpu(x.cpu(), 3).to("cuda") + assert torch.equal(result, expected) + assert result.shape == (120, 3) + + +def test_combinations_edge_cases(): + """Edge cases.""" + # r > n → empty + x = torch.tensor([1, 2, 3], device="cuda") + result = ntops.torch.combinations(x, 5) + assert result.numel() == 0 + assert result.shape == (0, 5) + + # r < 0 → error + with pytest.raises(ValueError): + ntops.torch.combinations(x, -1) + + # 2D input → error + x2d = torch.tensor([[1, 2], [3, 4]], device="cuda") + with pytest.raises(ValueError): + ntops.torch.combinations(x2d, 2) + + # Single element + x = torch.tensor([42], device="cuda") + result = ntops.torch.combinations(x, 1) + assert result.item() == 42 + assert result.shape == (1, 1) + + +def test_combinations_float16(): + """float16 dtype.""" + x = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float16, device="cuda") + result = ntops.torch.combinations(x, 2) + expected = torch.combinations(x, r=2) + assert torch.equal(result, expected) + + +def test_combinations_gpu_roundtrip(): + """Verify GPU tensor stays on GPU.""" + x = torch.tensor([10, 20, 30, 40, 50], device="cuda") + result = ntops.torch.combinations(x, 3) + assert result.is_cuda + assert result.shape == (10, 3) # C(5,3) = 10 diff --git a/tests/test_corrcoef.py b/tests/test_corrcoef.py new file mode 100644 index 0000000..47942f2 --- /dev/null +++ b/tests/test_corrcoef.py @@ -0,0 +1,73 @@ +import pytest +import torch + +import ntops +from tests.skippers import skip_if_cuda_not_available + + +@skip_if_cuda_not_available +def test_corrcoef_basic(): + """Basic correlation coefficient computation.""" + x = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], device="cuda") + result = ntops.torch.corrcoef(x) + expected = torch.corrcoef(x) + assert torch.allclose(result, expected) + assert result.shape == (2, 2) + + +@skip_if_cuda_not_available +def test_corrcoef_identity(): + """Perfect correlation with itself — diagonal should be 1.""" + x = torch.randn(3, 100, device="cuda") + result = ntops.torch.corrcoef(x) + expected = torch.corrcoef(x) + assert torch.allclose(result, expected) + assert torch.allclose(result.diag(), torch.ones(3, device="cuda")) + + +@skip_if_cuda_not_available +def test_corrcoef_constant(): + """Constant input — should produce NaN (division by zero variance).""" + x = torch.ones(3, 5, device="cuda") + result = ntops.torch.corrcoef(x) + expected = torch.corrcoef(x) + assert torch.equal(torch.isnan(result), torch.isnan(expected)) + + +@skip_if_cuda_not_available +def test_corrcoef_float16(): + """float16 precision.""" + x = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=torch.float16, device="cuda") + result = ntops.torch.corrcoef(x) + expected = torch.corrcoef(x) + assert torch.allclose(result, expected, rtol=1e-3, atol=1e-3) + + +@skip_if_cuda_not_available +def test_corrcoef_float64(): + """float64 precision.""" + x = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=torch.float64, device="cuda") + result = ntops.torch.corrcoef(x) + expected = torch.corrcoef(x) + assert torch.allclose(result, expected) + + +@skip_if_cuda_not_available +def test_corrcoef_negative_correlation(): + """Test negative correlation.""" + x = torch.tensor([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0]], device="cuda") + result = ntops.torch.corrcoef(x) + expected = torch.corrcoef(x) + assert torch.allclose(result, expected) + # Off-diagonal should be negative + assert result[0, 1] < 0 + + +@skip_if_cuda_not_available +def test_corrcoef_single_variable(): + """Single variable — returns a scalar 1.0 (same as torch.corrcoef).""" + x = torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0]], device="cuda") + result = ntops.torch.corrcoef(x) + expected = torch.corrcoef(x) + assert torch.allclose(result, expected) + assert result.ndim == 0 # torch.corrcoef returns scalar for single var diff --git a/tests/test_count_nonzero.py b/tests/test_count_nonzero.py new file mode 100644 index 0000000..88bd9a4 --- /dev/null +++ b/tests/test_count_nonzero.py @@ -0,0 +1,109 @@ +import pytest +import torch + +import ntops +from tests.skippers import skip_if_cuda_not_available + + +@skip_if_cuda_not_available +def test_count_nonzero_basic(): + """Basic counting of nonzero elements.""" + x = torch.tensor([[1, 0, 3], [0, 5, 0]], device="cuda") + result = ntops.torch.count_nonzero(x) + expected = torch.count_nonzero(x) + assert result.item() == expected.item() + assert result.item() == 3 + + +@skip_if_cuda_not_available +def test_count_nonzero_all_zero(): + """All zero input.""" + x = torch.zeros(3, 4, device="cuda") + result = ntops.torch.count_nonzero(x) + expected = torch.count_nonzero(x) + assert result.item() == expected.item() + assert result.item() == 0 + + +@skip_if_cuda_not_available +def test_count_nonzero_all_nonzero(): + """All nonzero input.""" + x = torch.ones(3, 4, device="cuda") + result = ntops.torch.count_nonzero(x) + expected = torch.count_nonzero(x) + assert result.item() == expected.item() + assert result.item() == 12 + + +@skip_if_cuda_not_available +@pytest.mark.parametrize("dim", [0, 1]) +def test_count_nonzero_dim(dim): + """Counting along a specific dimension.""" + x = torch.tensor([[1, 0, 3], [0, 5, 0]], device="cuda") + result = ntops.torch.count_nonzero(x, dim=dim) + expected = torch.count_nonzero(x, dim=dim) + assert torch.equal(result, expected) + + +@skip_if_cuda_not_available +def test_count_nonzero_keepdim(): + """Counting with keepdim=True.""" + x = torch.tensor([[1, 0, 3], [0, 5, 0]], device="cuda") + + result0 = ntops.torch.count_nonzero(x, dim=0, keepdim=True) + expected0 = torch.count_nonzero(x, dim=0).unsqueeze(0) + assert torch.equal(result0, expected0) + assert result0.ndim == x.ndim + + result1 = ntops.torch.count_nonzero(x, dim=1, keepdim=True) + expected1 = torch.count_nonzero(x, dim=1).unsqueeze(1) + assert torch.equal(result1, expected1) + assert result1.ndim == x.ndim + + +@skip_if_cuda_not_available +def test_count_nonzero_float(): + """Float tensor with zeros.""" + x = torch.tensor([0.0, 1.5, -2.3, 0.0, 3.14], device="cuda") + result = ntops.torch.count_nonzero(x) + expected = torch.count_nonzero(x) + assert result.item() == expected.item() + assert result.item() == 3 + + +@skip_if_cuda_not_available +def test_count_nonzero_3d(): + """3D tensor.""" + x = torch.tensor([[[1, 0], [0, 0]], [[0, 2], [3, 0]]], device="cuda") + result = ntops.torch.count_nonzero(x) + expected = torch.count_nonzero(x) + assert result.item() == expected.item() + assert result.item() == 3 + + +@skip_if_cuda_not_available +def test_count_nonzero_3d_dim(): + """3D tensor with dim.""" + x = torch.tensor([[[1, 0], [0, 0]], [[0, 2], [3, 0]]], device="cuda") + for dim in range(3): + result = ntops.torch.count_nonzero(x, dim=dim) + expected = torch.count_nonzero(x, dim=dim) + assert torch.equal(result, expected) + + +@skip_if_cuda_not_available +def test_count_nonzero_large(): + """Large random tensor.""" + x = torch.randint(0, 5, (100, 100), device="cuda") + result = ntops.torch.count_nonzero(x) + expected = torch.count_nonzero(x) + assert result.item() == expected.item() + + +@skip_if_cuda_not_available +def test_count_nonzero_empty(): + """Empty tensor.""" + x = torch.empty(0, 3, device="cuda") + result = ntops.torch.count_nonzero(x) + expected = torch.count_nonzero(x) + assert result.item() == expected.item() diff --git a/tests/test_kl_div.py b/tests/test_kl_div.py new file mode 100644 index 0000000..5187dc6 --- /dev/null +++ b/tests/test_kl_div.py @@ -0,0 +1,122 @@ +import pytest +import math +import torch + +import ntops + + +def kl_div_cpu(input, target, reduction="sum", log_target=False, eps=1e-10): + """CPU reference matching the spec.""" + if log_target: + log_p = target + p = torch.exp(log_p) + else: + p = torch.clamp(target, min=eps, max=1.0) + log_p = torch.log(p) + p = torch.clamp(p, min=eps, max=1.0) + loss = p * (log_p - input) + if reduction == "none": + return loss + elif reduction == "sum": + return loss.sum() + elif reduction == "mean": + return loss.mean() + elif reduction == "batchmean": + return loss.sum() / loss.shape[0] + + +DTYPE_TOLERANCES = [ + (torch.float32, 1e-5, 1e-5), + (torch.float16, 1e-3, 1e-3), +] + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_kl_div_identical(dtype, rtol, atol): + """KL(q||q) = 0 when distributions are identical.""" + log_q = torch.tensor([-0.6931, -0.6931, -1.0986], dtype=dtype, device="cuda") + target = torch.tensor([0.5, 0.5, 0.333], dtype=dtype, device="cuda") + result = ntops.torch.kl_div(log_q, target, reduction="sum") + expected = kl_div_cpu(log_q, target, reduction="sum") + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_kl_div_log_target(dtype, rtol, atol): + """KL divergence with log_target=True.""" + log_q = torch.tensor([-1.0, -0.5, -0.2], dtype=dtype, device="cuda") + log_target = torch.tensor([-1.0, -0.5, -0.2], dtype=dtype, device="cuda") + result = ntops.torch.kl_div(log_q, log_target, reduction="sum", log_target=True) + expected = kl_div_cpu(log_q, log_target, reduction="sum", log_target=True) + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert not torch.isnan(result).any() + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_kl_div_different(dtype, rtol, atol): + """KL divergence between different distributions.""" + log_q = torch.tensor([-0.6931, -0.6931], dtype=dtype, device="cuda") # log(0.5), log(0.5) + target = torch.tensor([0.9, 0.1], dtype=dtype, device="cuda") + result = ntops.torch.kl_div(log_q, target, reduction="sum") + expected = kl_div_cpu(log_q, target, reduction="sum") + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_kl_div_reduction_none(dtype, rtol, atol): + """No reduction — return element-wise loss.""" + log_q = torch.tensor([-1.0, -0.5], dtype=dtype, device="cuda") + target = torch.tensor([0.2, 0.8], dtype=dtype, device="cuda") + result = ntops.torch.kl_div(log_q, target, reduction="none") + expected = kl_div_cpu(log_q, target, reduction="none") + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + assert result.shape == log_q.shape + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_kl_div_reduction_mean(dtype, rtol, atol): + """Mean reduction.""" + log_q = torch.tensor([-0.6931, -0.6931, -0.5108, -0.5108], dtype=dtype, device="cuda") + target = torch.tensor([0.9, 0.1, 0.5, 0.5], dtype=dtype, device="cuda") + result = ntops.torch.kl_div(log_q, target, reduction="mean") + expected = kl_div_cpu(log_q, target, reduction="mean") + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + + +@pytest.mark.parametrize("dtype, rtol, atol", DTYPE_TOLERANCES) +def test_kl_div_reduction_batchmean(dtype, rtol, atol): + """Batchmean reduction.""" + log_q = torch.randn(4, 3, dtype=dtype, device="cuda").log_softmax(dim=1) + target = torch.randn(4, 3, dtype=dtype, device="cuda").softmax(dim=1) + result = ntops.torch.kl_div(log_q, target, reduction="batchmean") + expected = kl_div_cpu(log_q, target, reduction="batchmean") + assert torch.allclose(result, expected, rtol=rtol, atol=atol) + + +def test_kl_div_edge_cases(): + """Edge cases.""" + # Empty tensor + x = torch.empty(0, 3, device="cuda") + result = ntops.torch.kl_div(x, x, reduction="sum") + assert result.item() == 0.0 + + # Target at boundaries (0 and 1) — should be clamped + log_q = torch.tensor([-0.6931, -0.6931], device="cuda") + target = torch.tensor([0.0, 1.0], device="cuda") + result = ntops.torch.kl_div(log_q, target, reduction="sum") + assert not torch.isnan(result).any() + assert not torch.isinf(result).any() + + # Invalid reduction + with pytest.raises(ValueError): + ntops.torch.kl_div(log_q, target, reduction="invalid") + + +def test_kl_div_float64(): + """float64 precision.""" + log_q = torch.tensor([-0.693147, -0.693147], device="cuda", dtype=torch.float64) + target = torch.tensor([0.5, 0.5], device="cuda", dtype=torch.float64) + result = ntops.torch.kl_div(log_q, target, reduction="sum") + expected = kl_div_cpu(log_q, target, reduction="sum") + assert torch.allclose(result, expected, rtol=1e-7, atol=1e-7) diff --git a/tests/test_narrow.py b/tests/test_narrow.py new file mode 100644 index 0000000..62ce8b2 --- /dev/null +++ b/tests/test_narrow.py @@ -0,0 +1,15 @@ +import pytest, torch, ntops + +def test_narrow_basic(): + x = torch.arange(12, device="cuda").reshape(3, 4) + for dim, start, length in [(0, 0, 2), (1, 1, 2), (0, 1, 1), (1, 0, 4)]: + assert torch.equal(ntops.torch.narrow(x, dim, start, length), + torch.narrow(x, dim, start, length)) + +def test_narrow_1d(): + x = torch.tensor([1, 2, 3, 4, 5], device="cuda") + assert torch.equal(ntops.torch.narrow(x, 0, 2, 2), torch.tensor([3, 4], device="cuda")) + +def test_narrow_float16(): + x = torch.randn(10, device="cuda", dtype=torch.float16) + assert torch.equal(ntops.torch.narrow(x, 0, 3, 4), torch.narrow(x, 0, 3, 4)) From 0cea4fbd211eb1596154325e83e11baa0eb45356 Mon Sep 17 00:00:00 2001 From: Ifelseer <1138369491@qq.com> Date: Sun, 21 Jun 2026 05:30:17 +0000 Subject: [PATCH 2/2] honor --- HONOR_CODE.md | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 HONOR_CODE.md diff --git a/HONOR_CODE.md b/HONOR_CODE.md new file mode 100644 index 0000000..c93078f --- /dev/null +++ b/HONOR_CODE.md @@ -0,0 +1,73 @@ +``` +# 2026 春季启元人工智能大赛诚信守则(Honor Code) + + +本人作为 2026 春季启元人工智能大赛(以下简称“比赛”)的参赛选手,郑重承诺严格遵守比赛规则及本诚信守则,秉持诚信、公正、廉洁的参赛原则,自觉维护比赛的公平性与严肃性。本人充分理解并认可,违反本准则将导致参赛资格被取消、比赛成绩作废等相应后果,且愿意承担由此产生的一切责任。 + +## 一、参赛诚信承诺 + +1. 本人保证所提交的赛题PR(Pull Request)中包含的算子实现代码及相关文档,均为本人(及参赛团队,如为团队参赛)在比赛期间独立完成或在明确标注参考来源的基础上进行开发,不存在任何欺诈、抄袭、作弊行为。 + +2. 本人承诺主动、全面、真实地披露赛题实现过程中所有参考的外部资源,尤其是开源代码资源,不隐瞒任何可能影响比赛公平性的信息。 + +3. 本人保证不采用任何不正当手段获取比赛优势,包括但不限于窃取其他参赛选手的代码成果、利用非比赛允许的工具或技术、与他人串通作弊等。 + +## 二、参考资源说明 + +本人确认已按比赛要求,将本次赛题实现过程中涉及的参考资源信息单独撰写至`REFERENCE.md`文件中,该文件将与本诚信守则一同作为PR附件提交。`REFERENCE.md`需根据实际参考情况,按以下要求完整填写,信息不完整或虚假填写将视为违反本准则: + +**情况1:无参考外部开源代码及核心实现思路** + +`REFERENCE.md`中需明确声明:“本次赛题提交的算子代码、核心算法逻辑及实现方案均为本人(及参赛团队)独立设计与开发,未参考任何外部开源项目、技术文档中的核心代码片段或实现思路,未接受任何第三方的技术指导或代码支持。” + +**情况2:有参考外部开源代码及相关资源** + +对每个参考资源提供以下信息陈述: +1. 参考开源项目/资源名称 + +2. 参考资源链接(GitHub/Gitee/论文/技术文档等) + +3. 参考的具体内容(请明确说明参考的代码片段、算法逻辑、实现思路等,需标注对应资源的具体位置,如文件路径、代码行数等) + +4. 本人对参考内容的修改与优化说明:(请详细说明在参考基础上,本人所做的独立开发、修改、优化工作,体现自身技术贡献) + +5. 若是开源项目,提供参考资源的开源协议类型:(如MIT、Apache 2.0、GPL等) + +6. 其他需要补充说明的信息 + + +## 三、禁止行为确认 + +本人明确知晓并承诺避免以下违反比赛公平性的行为,若存在以下任一情况,自愿接受比赛组委会的相应处罚: + +1. 未经授权复制、抄袭他人(包括其他参赛选手、开源项目、商业代码)的代码、算法或技术方案,且未进行明确标注; + +2. 隐瞒或虚假披露参考资源信息,包括遗漏重要参考来源、伪造参考内容说明等; + +3. 与其他参赛选手或第三方串通,进行代码共享、成果交换等违规协作; + +4. 利用比赛平台漏洞、技术缺陷或非比赛允许的工具获取不正当利益; + +5. 伪造比赛相关证明材料、提交虚假信息; + +6. 其他违反比赛规则及公序良俗的不诚信行为。 + + +## 四、责任与确认 + +1. 本人充分理解,比赛组委会将对所有提交的PR进行代码溯源、参考信息核查等公平性审查,若发现本人存在违反本准则的行为,有权随时取消本人的参赛资格、作废比赛成绩,情节严重的将在比赛相关平台进行公示。 + +2. 若因本人违反本准则导致比赛争议或第三方权益受损(如开源协议侵权等),本人将独立承担全部法律责任及相关损失,与比赛组委会无关。 + +3. 本人确认已仔细阅读并完全理解本诚信守则的全部内容,自愿签署本准则,接受比赛组委会的监督与审查。 + +## 五、签署信息 + +参赛选手姓名(团队参赛需填写所有成员姓名) +王一鸣 + + +签署日期 + +2026年6月1日 +``` \ No newline at end of file