diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index 3b8a171e70..b08f435031 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -55,7 +55,7 @@ cu12 = ["cuda-bindings[all]==12.*"] cu13 = ["cuda-bindings[all]==13.*"] [dependency-groups] -test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-randomly", "pytest-repeat"] +test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures"] ml-dtypes = ["ml-dtypes>=0.5.4,<0.6.0"] test-cu12 = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cupy-cuda12x; python_version < '3.14'", "cuda-toolkit[cudart]==12.*"] # runtime headers needed by CuPy test-cu13 = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"] # runtime headers needed by CuPy diff --git a/cuda_core/tests/memory_ipc/test_errors.py b/cuda_core/tests/memory_ipc/test_errors.py index ccb3d3b7cc..fbf45f5fcc 100644 --- a/cuda_core/tests/memory_ipc/test_errors.py +++ b/cuda_core/tests/memory_ipc/test_errors.py @@ -1,14 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import multiprocessing import pickle import re +import pytest from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions from cuda.core._utils.cuda_utils import CUDAError -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 POOL_SIZE = 2097152 @@ -17,6 +18,7 @@ class ChildErrorHarness: """Test harness for checking errors in child processes. Subclasses override PARENT_ACTION, CHILD_ACTION, and ASSERT (see below for examples).""" + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): """Parent process that checks child errors.""" # Attach fixtures to this object for convenience. These can be accessed diff --git a/cuda_core/tests/memory_ipc/test_event_ipc.py b/cuda_core/tests/memory_ipc/test_event_ipc.py index 1fabaeddda..a2636ddff5 100644 --- a/cuda_core/tests/memory_ipc/test_event_ipc.py +++ b/cuda_core/tests/memory_ipc/test_event_ipc.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import multiprocessing as mp @@ -10,13 +10,14 @@ from helpers.logging import TimestampedLogger ENABLE_LOGGING = False # Set True for test debugging and development -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 class TestEventIpc: """Check the basic usage of IPC-enabled events with a latch kernel.""" + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): log = TimestampedLogger(prefix="parent: ", enabled=ENABLE_LOGGING) device = ipc_device @@ -93,6 +94,7 @@ def child_main(self, log, q_in, q_out): log("done") +@pytest.mark.flaky(reruns=2) def test_event_is_monadic(ipc_device): """Check that IPC-enabled events are always bound and cannot be reset.""" device = ipc_device @@ -108,6 +110,7 @@ def test_event_is_monadic(ipc_device): stream.record(e) +@pytest.mark.flaky(reruns=2) @pytest.mark.parametrize( "options", [{"ipc_enabled": True, "enable_timing": True}, EventOptions(ipc_enabled=True, enable_timing=True)] ) @@ -125,6 +128,7 @@ class TestIpcEventProperties: process. """ + @pytest.mark.flaky(reruns=2) @pytest.mark.parametrize("busy_waited_sync", [True, False]) @pytest.mark.parametrize("use_options_cls", [True, False]) @pytest.mark.parametrize("use_option_kw", [True, False]) diff --git a/cuda_core/tests/memory_ipc/test_ipc_duplicate_import.py b/cuda_core/tests/memory_ipc/test_ipc_duplicate_import.py index ca4ecc0749..148339c7ae 100644 --- a/cuda_core/tests/memory_ipc/test_ipc_duplicate_import.py +++ b/cuda_core/tests/memory_ipc/test_ipc_duplicate_import.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Test for duplicate IPC buffer imports. @@ -16,7 +16,7 @@ from cuda.core import Buffer, Device from helpers.logging import TimestampedLogger -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 POOL_SIZE = 2097152 @@ -60,6 +60,7 @@ def _set_start_method(self): with contextlib.suppress(RuntimeError): mp.set_start_method("spawn", force=True) + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): log = TimestampedLogger(prefix="parent: ", enabled=ENABLE_LOGGING) ipc_device.set_current() diff --git a/cuda_core/tests/memory_ipc/test_leaks.py b/cuda_core/tests/memory_ipc/test_leaks.py index 02b3bcbbf4..0122dd4904 100644 --- a/cuda_core/tests/memory_ipc/test_leaks.py +++ b/cuda_core/tests/memory_ipc/test_leaks.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import contextlib @@ -14,7 +14,7 @@ HAVE_PSUTIL = True import pytest -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 USING_FDS = platform.system() == "Linux" @@ -23,6 +23,7 @@ ) +@pytest.mark.flaky(reruns=2) @skip_if_unrunnable def test_alloc_handle(ipc_memory_resource): """Check for fd leaks in get_allocation_handle.""" @@ -79,6 +80,7 @@ def __reduce__(self): raise RuntimeError("Irreducible") +@pytest.mark.flaky(reruns=2) @skip_if_unrunnable @pytest.mark.parametrize( "getobject", diff --git a/cuda_core/tests/memory_ipc/test_memory_ipc.py b/cuda_core/tests/memory_ipc/test_memory_ipc.py index d92a28ab5a..0d1c50bfbd 100644 --- a/cuda_core/tests/memory_ipc/test_memory_ipc.py +++ b/cuda_core/tests/memory_ipc/test_memory_ipc.py @@ -1,18 +1,20 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import multiprocessing as mp +import pytest from cuda.core import Buffer, DeviceMemoryResource from helpers.buffers import PatternGen -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 NWORKERS = 2 NTASKS = 2 class TestIpcMempool: + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): """Test IPC with memory pools.""" # Set up the IPC-enabled memory pool and share it. @@ -54,6 +56,7 @@ def child_main(self, device, mr, queue): class TestIPCMempoolMultiple: + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): """Test IPC with memory pools using multiple processes.""" # Construct an IPC-enabled memory resource and share it with two children. @@ -104,6 +107,7 @@ def child_main(self, device, mr, seed, queue): class TestIPCSharedAllocationHandleAndBufferDescriptors: + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): """ Demonstrate that a memory pool allocation handle can be reused for IPC @@ -154,6 +158,7 @@ def child_main(self, device, alloc_handle, seed, queue): class TestIPCSharedAllocationHandleAndBufferObjects: + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): """ Demonstrate that a memory pool allocation handle can be reused for IPC diff --git a/cuda_core/tests/memory_ipc/test_peer_access.py b/cuda_core/tests/memory_ipc/test_peer_access.py index 5a06133c9b..e2a4e6c5b2 100644 --- a/cuda_core/tests/memory_ipc/test_peer_access.py +++ b/cuda_core/tests/memory_ipc/test_peer_access.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import multiprocessing as mp @@ -8,7 +8,7 @@ from cuda.core._utils.cuda_utils import CUDAError from helpers.buffers import PatternGen -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 POOL_SIZE = 2097152 @@ -19,6 +19,7 @@ class TestPeerAccessNotPreservedOnImport: is sent to another process via IPC, and that peer access can be set after import. """ + @pytest.mark.flaky(reruns=2) def test_main(self, mempool_device_x2): dev0, dev1 = mempool_device_x2 @@ -57,6 +58,7 @@ class TestBufferPeerAccessAfterImport: setting peer access on the imported memory resource, and that access can be revoked. """ + @pytest.mark.flaky(reruns=2) @pytest.mark.parametrize("grant_access_in_parent", [True, False]) def test_main(self, mempool_device_x2, grant_access_in_parent): dev0, dev1 = mempool_device_x2 diff --git a/cuda_core/tests/memory_ipc/test_send_buffers.py b/cuda_core/tests/memory_ipc/test_send_buffers.py index 2df3fe1bbc..a645ec93d9 100644 --- a/cuda_core/tests/memory_ipc/test_send_buffers.py +++ b/cuda_core/tests/memory_ipc/test_send_buffers.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import multiprocessing as mp @@ -8,7 +8,7 @@ from cuda.core import Device, DeviceMemoryResource, DeviceMemoryResourceOptions from helpers.buffers import PatternGen -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 NMRS = 3 NTASKS = 7 @@ -16,6 +16,7 @@ class TestIpcSendBuffers: + @pytest.mark.flaky(reruns=2) @pytest.mark.parametrize("nmrs", (1, NMRS)) def test_main(self, ipc_device, nmrs): """Test passing buffers sourced from multiple memory resources.""" @@ -67,6 +68,7 @@ class TestIpcReexport: re-exported from B to C. """ + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): # Set up the device. device = ipc_device diff --git a/cuda_core/tests/memory_ipc/test_serialize.py b/cuda_core/tests/memory_ipc/test_serialize.py index 546c8a91aa..3e31db19e4 100644 --- a/cuda_core/tests/memory_ipc/test_serialize.py +++ b/cuda_core/tests/memory_ipc/test_serialize.py @@ -1,14 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import multiprocessing as mp import multiprocessing.reduction import os +import pytest from cuda.core import Buffer, Device, DeviceMemoryResource from helpers.buffers import PatternGen -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 POOL_SIZE = 2097152 @@ -21,6 +22,7 @@ class TestObjectSerializationDirect: it on the other end and demonstrate buffer sharing. """ + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): device = ipc_device mr = ipc_memory_resource @@ -76,6 +78,7 @@ def child_main(self, conn): class TestObjectSerializationWithMR: + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): """Test sending IPC memory objects to a child through a queue.""" device = ipc_device @@ -131,6 +134,7 @@ class TestObjectPassing: in multiprocessing (e.g., Queue) work. """ + @pytest.mark.flaky(reruns=2) def test_main(self, ipc_device, ipc_memory_resource): # Define the objects. device = ipc_device diff --git a/cuda_core/tests/memory_ipc/test_workerpool.py b/cuda_core/tests/memory_ipc/test_workerpool.py index b13b9896a1..25e63dddc5 100644 --- a/cuda_core/tests/memory_ipc/test_workerpool.py +++ b/cuda_core/tests/memory_ipc/test_workerpool.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import multiprocessing as mp @@ -9,7 +9,7 @@ from cuda.core import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions from helpers.buffers import PatternGen -CHILD_TIMEOUT_SEC = 20 +CHILD_TIMEOUT_SEC = 30 NBYTES = 64 NWORKERS = 2 NMRS = 3 @@ -26,6 +26,7 @@ class TestIpcWorkerPool: resource (duplicates are ignored on the receiving end). """ + @pytest.mark.flaky(reruns=2) @pytest.mark.parametrize("nmrs", (1, NMRS)) def test_main(self, ipc_device, nmrs): device = ipc_device @@ -62,6 +63,7 @@ def init_worker(mrs): """Called during child process initialization to store received memory resources.""" TestIpcWorkerPoolUsingIPCDescriptors.mrs = mrs + @pytest.mark.flaky(reruns=2) @pytest.mark.parametrize("nmrs", (1, NMRS)) def test_main(self, ipc_device, nmrs): device = ipc_device @@ -106,6 +108,7 @@ def init_worker(mrs): # Passing mrs implicitly registers them. pass + @pytest.mark.flaky(reruns=2) @pytest.mark.parametrize("nmrs", (1, NMRS)) def test_main(self, ipc_device, nmrs): device = ipc_device