Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cuda_core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ cu12 = ["cuda-bindings[all]==12.*"]
cu13 = ["cuda-bindings[all]==13.*"]

[dependency-groups]
test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-randomly", "pytest-repeat"]
test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures"]
ml-dtypes = ["ml-dtypes>=0.5.4,<0.6.0"]
test-cu12 = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cupy-cuda12x; python_version < '3.14'", "cuda-toolkit[cudart]==12.*"] # runtime headers needed by CuPy
test-cu13 = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"] # runtime headers needed by CuPy
Expand Down
6 changes: 4 additions & 2 deletions cuda_core/tests/memory_ipc/test_errors.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing
import pickle
import re

import pytest
from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions
from cuda.core._utils.cuda_utils import CUDAError

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64
POOL_SIZE = 2097152

Expand All @@ -17,6 +18,7 @@ class ChildErrorHarness:
"""Test harness for checking errors in child processes. Subclasses override
PARENT_ACTION, CHILD_ACTION, and ASSERT (see below for examples)."""

@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
"""Parent process that checks child errors."""
# Attach fixtures to this object for convenience. These can be accessed
Expand Down
8 changes: 6 additions & 2 deletions cuda_core/tests/memory_ipc/test_event_ipc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing as mp
Expand All @@ -10,13 +10,14 @@
from helpers.logging import TimestampedLogger

ENABLE_LOGGING = False # Set True for test debugging and development
CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64


class TestEventIpc:
"""Check the basic usage of IPC-enabled events with a latch kernel."""

@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
log = TimestampedLogger(prefix="parent: ", enabled=ENABLE_LOGGING)
device = ipc_device
Expand Down Expand Up @@ -93,6 +94,7 @@ def child_main(self, log, q_in, q_out):
log("done")


@pytest.mark.flaky(reruns=2)
def test_event_is_monadic(ipc_device):
"""Check that IPC-enabled events are always bound and cannot be reset."""
device = ipc_device
Expand All @@ -108,6 +110,7 @@ def test_event_is_monadic(ipc_device):
stream.record(e)


@pytest.mark.flaky(reruns=2)
@pytest.mark.parametrize(
"options", [{"ipc_enabled": True, "enable_timing": True}, EventOptions(ipc_enabled=True, enable_timing=True)]
)
Expand All @@ -125,6 +128,7 @@ class TestIpcEventProperties:
process.
"""

@pytest.mark.flaky(reruns=2)
@pytest.mark.parametrize("busy_waited_sync", [True, False])
@pytest.mark.parametrize("use_options_cls", [True, False])
@pytest.mark.parametrize("use_option_kw", [True, False])
Expand Down
5 changes: 3 additions & 2 deletions cuda_core/tests/memory_ipc/test_ipc_duplicate_import.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Test for duplicate IPC buffer imports.
Expand All @@ -16,7 +16,7 @@
from cuda.core import Buffer, Device
from helpers.logging import TimestampedLogger

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64
POOL_SIZE = 2097152

Expand Down Expand Up @@ -60,6 +60,7 @@ def _set_start_method(self):
with contextlib.suppress(RuntimeError):
mp.set_start_method("spawn", force=True)

@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
log = TimestampedLogger(prefix="parent: ", enabled=ENABLE_LOGGING)
ipc_device.set_current()
Expand Down
6 changes: 4 additions & 2 deletions cuda_core/tests/memory_ipc/test_leaks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import contextlib
Expand All @@ -14,7 +14,7 @@
HAVE_PSUTIL = True
import pytest

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64

USING_FDS = platform.system() == "Linux"
Expand All @@ -23,6 +23,7 @@
)


@pytest.mark.flaky(reruns=2)
@skip_if_unrunnable
def test_alloc_handle(ipc_memory_resource):
"""Check for fd leaks in get_allocation_handle."""
Expand Down Expand Up @@ -79,6 +80,7 @@ def __reduce__(self):
raise RuntimeError("Irreducible")


@pytest.mark.flaky(reruns=2)
@skip_if_unrunnable
@pytest.mark.parametrize(
"getobject",
Expand Down
9 changes: 7 additions & 2 deletions cuda_core/tests/memory_ipc/test_memory_ipc.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing as mp

import pytest
from cuda.core import Buffer, DeviceMemoryResource
from helpers.buffers import PatternGen

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64
NWORKERS = 2
NTASKS = 2


class TestIpcMempool:
@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
"""Test IPC with memory pools."""
# Set up the IPC-enabled memory pool and share it.
Expand Down Expand Up @@ -54,6 +56,7 @@ def child_main(self, device, mr, queue):


class TestIPCMempoolMultiple:
@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
"""Test IPC with memory pools using multiple processes."""
# Construct an IPC-enabled memory resource and share it with two children.
Expand Down Expand Up @@ -104,6 +107,7 @@ def child_main(self, device, mr, seed, queue):


class TestIPCSharedAllocationHandleAndBufferDescriptors:
@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
"""
Demonstrate that a memory pool allocation handle can be reused for IPC
Expand Down Expand Up @@ -154,6 +158,7 @@ def child_main(self, device, alloc_handle, seed, queue):


class TestIPCSharedAllocationHandleAndBufferObjects:
@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
"""
Demonstrate that a memory pool allocation handle can be reused for IPC
Expand Down
6 changes: 4 additions & 2 deletions cuda_core/tests/memory_ipc/test_peer_access.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing as mp
Expand All @@ -8,7 +8,7 @@
from cuda.core._utils.cuda_utils import CUDAError
from helpers.buffers import PatternGen

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64
POOL_SIZE = 2097152

Expand All @@ -19,6 +19,7 @@ class TestPeerAccessNotPreservedOnImport:
is sent to another process via IPC, and that peer access can be set after import.
"""

@pytest.mark.flaky(reruns=2)
def test_main(self, mempool_device_x2):
dev0, dev1 = mempool_device_x2

Expand Down Expand Up @@ -57,6 +58,7 @@ class TestBufferPeerAccessAfterImport:
setting peer access on the imported memory resource, and that access can be revoked.
"""

@pytest.mark.flaky(reruns=2)
@pytest.mark.parametrize("grant_access_in_parent", [True, False])
def test_main(self, mempool_device_x2, grant_access_in_parent):
dev0, dev1 = mempool_device_x2
Expand Down
6 changes: 4 additions & 2 deletions cuda_core/tests/memory_ipc/test_send_buffers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing as mp
Expand All @@ -8,14 +8,15 @@
from cuda.core import Device, DeviceMemoryResource, DeviceMemoryResourceOptions
from helpers.buffers import PatternGen

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64
NMRS = 3
NTASKS = 7
POOL_SIZE = 2097152


class TestIpcSendBuffers:
@pytest.mark.flaky(reruns=2)
@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_main(self, ipc_device, nmrs):
"""Test passing buffers sourced from multiple memory resources."""
Expand Down Expand Up @@ -67,6 +68,7 @@ class TestIpcReexport:
re-exported from B to C.
"""

@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
# Set up the device.
device = ipc_device
Expand Down
8 changes: 6 additions & 2 deletions cuda_core/tests/memory_ipc/test_serialize.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing as mp
import multiprocessing.reduction
import os

import pytest
from cuda.core import Buffer, Device, DeviceMemoryResource
from helpers.buffers import PatternGen

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64
POOL_SIZE = 2097152

Expand All @@ -21,6 +22,7 @@ class TestObjectSerializationDirect:
it on the other end and demonstrate buffer sharing.
"""

@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
device = ipc_device
mr = ipc_memory_resource
Expand Down Expand Up @@ -76,6 +78,7 @@ def child_main(self, conn):


class TestObjectSerializationWithMR:
@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
"""Test sending IPC memory objects to a child through a queue."""
device = ipc_device
Expand Down Expand Up @@ -131,6 +134,7 @@ class TestObjectPassing:
in multiprocessing (e.g., Queue) work.
"""

@pytest.mark.flaky(reruns=2)
def test_main(self, ipc_device, ipc_memory_resource):
# Define the objects.
device = ipc_device
Expand Down
7 changes: 5 additions & 2 deletions cuda_core/tests/memory_ipc/test_workerpool.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing as mp
Expand All @@ -9,7 +9,7 @@
from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions
from helpers.buffers import PatternGen

CHILD_TIMEOUT_SEC = 20
CHILD_TIMEOUT_SEC = 30
NBYTES = 64
NWORKERS = 2
NMRS = 3
Expand All @@ -26,6 +26,7 @@ class TestIpcWorkerPool:
resource (duplicates are ignored on the receiving end).
"""

@pytest.mark.flaky(reruns=2)
@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_main(self, ipc_device, nmrs):
device = ipc_device
Expand Down Expand Up @@ -62,6 +63,7 @@ def init_worker(mrs):
"""Called during child process initialization to store received memory resources."""
TestIpcWorkerPoolUsingIPCDescriptors.mrs = mrs

@pytest.mark.flaky(reruns=2)
@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_main(self, ipc_device, nmrs):
device = ipc_device
Expand Down Expand Up @@ -106,6 +108,7 @@ def init_worker(mrs):
# Passing mrs implicitly registers them.
pass

@pytest.mark.flaky(reruns=2)
@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_main(self, ipc_device, nmrs):
device = ipc_device
Expand Down
Loading