Compiler Explorer

Source code

/*
 * Copyright (c) 2020-2024, NVIDIA CORPORATION.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <cuco/static_multimap.cuh>

#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/logical.h>
#include <thrust/sequence.h>
#include <thrust/tuple.h>

#include <cmath>
#include <cstddef>
#include <iostream>
#include <limits>

using Key   = int;
using Value = int;

namespace cg = cooperative_groups;

template <int CGSize, typename Map, typename KeyIter, typename ValueIter>
__global__ void insert(Map map_view,
                        KeyIter key_begin,
                        ValueIter value_begin,
                        std::size_t num_keys)
{
  auto tile = cg::tiled_partition<CGSize>(cg::this_thread_block());
  auto const loop_stride = gridDim.x * blockDim.x / CGSize;
  auto tid = (blockDim.x * blockIdx.x + threadIdx.x) / CGSize;

while (tid < num_keys) {
      map_view.insert(tile, cuco::pair<Key, Value>{key_begin[tid], value_begin[tid]});
    tid += loop_stride;
  }
}

int main(void)
{
  // Empty slots are represented by reserved "sentinel" values. These values should be selected such
  // that they never occur in your input data.
  Key constexpr empty_key_sentinel     = -1;
  Value constexpr empty_value_sentinel = -1;

// Number of key/value pairs to be inserted
  std::size_t constexpr num_keys = 50'000;

// Create a sequence of keys and values {{0,0}, {1,1}, ... {i,i}}
  thrust::device_vector<Key> insert_keys(num_keys);
  thrust::sequence(insert_keys.begin(), insert_keys.end(), 0);
  thrust::device_vector<Value> insert_values(num_keys);
  thrust::sequence(insert_values.begin(), insert_values.end(), 0);

// Compute capacity based on a 50% load factor
  auto constexpr load_factor = 0.5;
  std::size_t const capacity = std::ceil(num_keys / load_factor);

// Constructs a map with "capacity" slots using -1 and -1 as the empty key/value sentinels.
  cuco::static_multimap<Key, Value> map{
    capacity, cuco::empty_key{empty_key_sentinel}, cuco::empty_value{empty_value_sentinel}};

// Get a non-owning, mutable view of the map that allows inserts to pass by value into the kernel
  auto device_insert_view = map.get_device_mutable_view();

auto constexpr cg_size = cuco::static_multimap<Key, Value>::cg_size();
  auto constexpr block_size = 256;
  auto const grid_size      = (cg_size * num_keys + block_size - 1) / block_size;
  insert<cg_size><<<grid_size, block_size>>>(device_insert_view,
                                    insert_keys.begin(),
                                    insert_values.begin(),
                                    num_keys);

return 0;
}