Skip to content

Commit 2bdd1f0

Browse files
authored
Merge branch 'dev' into patch-2
2 parents 9c6722c + 997ff31 commit 2bdd1f0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+800
-288
lines changed

.devcontainer/cuda11.8-gcc11/devcontainer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"shutdownAction": "stopContainer",
3-
"image": "rapidsai/devcontainers:24.12-cpp-gcc11-cuda11.8-ubuntu22.04",
3+
"image": "rapidsai/devcontainers:25.06-cpp-gcc11-cuda11.8-ubuntu22.04",
44
"hostRequirements": {
55
"gpu": true
66
},

.devcontainer/cuda12.6-gcc12/devcontainer.json renamed to .devcontainer/cuda12.8-gcc12/devcontainer.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"shutdownAction": "stopContainer",
3-
"image": "rapidsai/devcontainers:24.12-cpp-gcc12-cuda12.6-ubuntu22.04",
3+
"image": "rapidsai/devcontainers:25.06-cpp-gcc12-cuda12.8-ubuntu22.04",
44
"hostRequirements": {
55
"gpu": true
66
},
@@ -14,8 +14,8 @@
1414
"SCCACHE_BUCKET": "rapids-sccache-devs",
1515
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
1616
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17-
"DEVCONTAINER_NAME": "cuda12.6-gcc12",
18-
"CUCO_CUDA_VERSION": "12.6",
17+
"DEVCONTAINER_NAME": "cuda12.8-gcc12",
18+
"CUCO_CUDA_VERSION": "12.8",
1919
"CUCO_HOST_COMPILER": "gcc",
2020
"CUCO_HOST_COMPILER_VERSION": "12"
2121
},
@@ -38,5 +38,5 @@
3838
}
3939
}
4040
},
41-
"name": "cuda12.6-gcc12"
41+
"name": "cuda12.8-gcc12"
4242
}

.devcontainer/cuda12.6-gcc13/devcontainer.json renamed to .devcontainer/cuda12.8-gcc13/devcontainer.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"shutdownAction": "stopContainer",
3-
"image": "rapidsai/devcontainers:24.12-cpp-gcc13-cuda12.6-ubuntu22.04",
3+
"image": "rapidsai/devcontainers:25.06-cpp-gcc13-cuda12.8-ubuntu22.04",
44
"hostRequirements": {
55
"gpu": true
66
},
@@ -14,8 +14,8 @@
1414
"SCCACHE_BUCKET": "rapids-sccache-devs",
1515
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
1616
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17-
"DEVCONTAINER_NAME": "cuda12.6-gcc13",
18-
"CUCO_CUDA_VERSION": "12.6",
17+
"DEVCONTAINER_NAME": "cuda12.8-gcc13",
18+
"CUCO_CUDA_VERSION": "12.8",
1919
"CUCO_HOST_COMPILER": "gcc",
2020
"CUCO_HOST_COMPILER_VERSION": "13"
2121
},
@@ -38,5 +38,5 @@
3838
}
3939
}
4040
},
41-
"name": "cuda12.6-gcc13"
41+
"name": "cuda12.8-gcc13"
4242
}

.devcontainer/devcontainer.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"shutdownAction": "stopContainer",
3-
"image": "rapidsai/devcontainers:24.12-cpp-gcc13-cuda12.6-ubuntu22.04",
3+
"image": "rapidsai/devcontainers:25.06-cpp-gcc13-cuda12.8-ubuntu22.04",
44
"hostRequirements": {
55
"gpu": true
66
},
@@ -14,8 +14,8 @@
1414
"SCCACHE_BUCKET": "rapids-sccache-devs",
1515
"AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
1616
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17-
"DEVCONTAINER_NAME": "cuda12.6-gcc13",
18-
"CUCO_CUDA_VERSION": "12.6",
17+
"DEVCONTAINER_NAME": "cuda12.8-gcc13",
18+
"CUCO_CUDA_VERSION": "12.8",
1919
"CUCO_HOST_COMPILER": "gcc",
2020
"CUCO_HOST_COMPILER_VERSION": "13"
2121
},
@@ -38,5 +38,5 @@
3838
}
3939
}
4040
},
41-
"name": "cuda12.6-gcc13"
41+
"name": "cuda12.8-gcc13"
4242
}

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ ci:
1010

1111
repos:
1212
- repo: https://github.com/pre-commit/mirrors-clang-format
13-
rev: v18.1.8
13+
rev: v20.1.4
1414
hooks:
1515
- id: clang-format
1616
types_or: [c, c++, cuda]

CMakeLists.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#=============================================================================
2-
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -15,8 +15,10 @@
1515
#=============================================================================
1616
cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
1717

18+
set(rapids-cmake-version 25.06)
1819
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
19-
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-25.02/RAPIDS.cmake
20+
file(DOWNLOAD
21+
https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${rapids-cmake-version}/RAPIDS.cmake
2022
${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
2123
endif()
2224
include(${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
@@ -94,9 +96,13 @@ if(BUILD_TESTS)
9496
endif(BUILD_TESTS)
9597

9698
###################################################################################################
97-
# - Optionally build google benchmarks ------------------------------------------------------------
99+
# - Optionally build nvbench benchmarks -----------------------------------------------------------
98100

99101
if(BUILD_BENCHMARKS)
102+
include(${rapids-cmake-dir}/cpm/nvbench.cmake)
103+
include(${rapids-cmake-dir}/cpm/package_override.cmake)
104+
rapids_cpm_nvbench(BUILD_STATIC)
105+
100106
add_subdirectory(benchmarks)
101107
endif(BUILD_BENCHMARKS)
102108

README.md

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

benchmarks/CMakeLists.txt

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#=============================================================================
2-
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -15,14 +15,6 @@
1515
#=============================================================================
1616
cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
1717

18-
CPMAddPackage(
19-
NAME nvbench
20-
GITHUB_REPOSITORY NVIDIA/nvbench
21-
GIT_TAG main
22-
GIT_SHALLOW TRUE
23-
EXCLUDE_FROM_ALL YES
24-
)
25-
2618
###################################################################################################
2719
### compiler function #############################################################################
2820
###################################################################################################
@@ -89,7 +81,8 @@ ConfigureBench(DYNAMIC_MAP_BENCH
8981
dynamic_map/insert_bench.cu
9082
dynamic_map/find_bench.cu
9183
dynamic_map/contains_bench.cu
92-
dynamic_map/erase_bench.cu)
84+
dynamic_map/erase_bench.cu
85+
dynamic_map/retrieve_all_bench.cu)
9386

9487
###################################################################################################
9588
# - hash function benchmarks ----------------------------------------------------------------------
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <benchmark_defaults.hpp>
18+
#include <benchmark_utils.hpp>
19+
20+
#include <cuco/dynamic_map.cuh>
21+
#include <cuco/utility/key_generator.cuh>
22+
23+
#include <nvbench/nvbench.cuh>
24+
25+
#include <thrust/device_vector.h>
26+
#include <thrust/transform.h>
27+
28+
using namespace cuco::benchmark; // defaults, dist_from_state
29+
using namespace cuco::utility; // key_generator, distribution
30+
31+
/**
32+
* @brief A benchmark evaluating `cuco::dynamic_map::retrieve_all` performance
33+
*/
34+
template <typename Key, typename Value, typename Dist>
35+
std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> dynamic_map_retrieve_all(
36+
nvbench::state& state, nvbench::type_list<Key, Value, Dist>)
37+
{
38+
using pair_type = cuco::pair<Key, Value>;
39+
40+
auto const num_keys = state.get_int64("NumInputs");
41+
auto const initial_size = state.get_int64("InitSize");
42+
43+
thrust::device_vector<Key> keys(num_keys);
44+
45+
key_generator gen;
46+
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
47+
48+
thrust::device_vector<pair_type> pairs(num_keys);
49+
thrust::transform(keys.begin(), keys.end(), pairs.begin(), [] __device__(Key const& key) {
50+
return pair_type(key, {});
51+
});
52+
53+
cuco::dynamic_map<Key, Value> map{
54+
static_cast<size_t>(initial_size), cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
55+
map.insert(pairs.begin(), pairs.end());
56+
// Prepare output buffers
57+
thrust::device_vector<Key> retrieved_keys(map.get_size());
58+
thrust::device_vector<Value> retrieved_values(map.get_size());
59+
60+
state.add_element_count(map.get_size());
61+
62+
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
63+
map.retrieve_all(retrieved_keys.begin(), retrieved_values.begin(), launch.get_stream());
64+
});
65+
}
66+
67+
template <typename Key, typename Value, typename Dist>
68+
std::enable_if_t<(sizeof(Key) != sizeof(Value)), void> dynamic_map_retrieve_all(
69+
nvbench::state& state, nvbench::type_list<Key, Value, Dist>)
70+
{
71+
state.skip("Key should be the same type as Value.");
72+
}
73+
74+
NVBENCH_BENCH_TYPES(dynamic_map_retrieve_all,
75+
NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
76+
defaults::VALUE_TYPE_RANGE,
77+
nvbench::type_list<distribution::unique>))
78+
.set_name("dynamic_map_retrieve_all_unique_capacity")
79+
.set_type_axes_names({"Key", "Value", "Distribution"})
80+
.set_max_noise(defaults::MAX_NOISE)
81+
.add_int64_axis("NumInputs", defaults::N_RANGE)
82+
.add_int64_axis("InitSize", {defaults::INITIAL_SIZE});
83+
84+
NVBENCH_BENCH_TYPES(dynamic_map_retrieve_all,
85+
NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
86+
defaults::VALUE_TYPE_RANGE,
87+
nvbench::type_list<distribution::unique>))
88+
.set_name("dynamic_map_retrieve_all_fixed_capacity")
89+
.set_type_axes_names({"Key", "Value", "Distribution"})
90+
.set_max_noise(defaults::MAX_NOISE)
91+
.add_int64_axis("NumInputs", {defaults::N})
92+
.add_int64_axis("InitSize", {defaults::INITIAL_SIZE});

ci/build.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,17 +134,17 @@ while [ "${#args[@]}" -ne 0 ]; do
134134
esac
135135
done
136136

137+
if [ $VERBOSE ]; then
138+
set -x
139+
fi
140+
137141
# Convert to full paths:
138142
HOST_COMPILER=$(which ${HOST_COMPILER})
139143
CUDA_COMPILER=$(which ${CUDA_COMPILER})
140144
CMAKE_BINARY=$(which ${CMAKE_BINARY})
141145
# Make CUDA arch list compatible with cmake
142146
CUDA_ARCHS=$(echo "$CUDA_ARCHS" | tr ' ,' ';;')
143147

144-
if [ $VERBOSE ]; then
145-
set -x
146-
fi
147-
148148
# Begin processing unsets after option parsing
149149
set -u
150150

0 commit comments

Comments
 (0)