Compiler Explorer

Source code

/*
 * Copyright (c) 2024, NVIDIA CORPORATION.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <cuco/static_map.cuh>

#include <thrust/device_vector.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/tuple.h>

#include <cstddef>
#include <iostream>

using Key   = int;
using Value = int;

struct write_ftor {
    int tid;
    cuco::pair<Key, Value>* output;
    __device__ write_ftor(  cuco::pair<Key, Value>* pairs, int i) : tid(i), output(pairs) {}
    __device__ void operator()(cuco::pair<Key, Value> const& p) const {
        output[tid].first = p.first;
        output[tid].second = p.second;
        printf("i: %d, k: %d v: %d\n",tid, output[tid].first, output[tid].second);
    }
};

template <typename Map, typename KeyIter, typename Pair>
__global__ void write(Map map_ref,
                      KeyIter keys_begin,
                      std::size_t num_keys,Pair* pairs )
{
  auto tid = threadIdx.x + blockIdx.x * blockDim.x;
//   printf("the map size is %d", map_ref.get_size());
  while (tid < num_keys) {
    map_ref.for_each(*(keys_begin + tid), 
      write_ftor(pairs,tid));
   
    tid += gridDim.x * blockDim.x;
  }
}

int main()
{

Key constexpr empty_key_sentinel     = -1;
  Value constexpr empty_value_sentinel = -1;

// Number of key/value pairs to be inserted
  std::size_t constexpr num_keys = 10;

// Compute capacity based on a 50% load factor
  auto constexpr load_factor = 0.5;
  std::size_t const capacity = std::ceil(num_keys / load_factor);

auto map = cuco::static_map{capacity,
                              cuco::empty_key{empty_key_sentinel},
                              cuco::empty_value{empty_value_sentinel},
                              thrust::equal_to<Key>{},
                              cuco::linear_probing<1, cuco::default_hash_function<Key>>{}};

auto const pairs_begin = thrust::make_transform_iterator(
    thrust::make_counting_iterator(0),
    cuda::proclaim_return_type<cuco::pair<Key, Value>>(
      [] __device__(auto i) { return cuco::pair<Key, Value>(i, i); }));

// insert 10 pairs: {0, 0}, {1, 1}, ...
  map.insert(pairs_begin, pairs_begin + num_keys);

// Get a non-owning `for_each` ref
  auto device_ref = map.ref(cuco::for_each);
  
  // copy 10 pairs
  thrust::device_vector<cuco::pair<Key, Value>> stored_pairs(10);
  write<<<1, 32>>>(device_ref,
                   thrust::counting_iterator<Key>{0},
                   num_keys,thrust::raw_pointer_cast(stored_pairs.data()));
  cudaDeviceSynchronize();
  // print in host 
  std::vector<cuco::pair<int, int>> host_pairs(10);
  for (int i = 0; i < 10; i++) {
    host_pairs[i] = cuco::pair<int, int>(i, i * 10);  // For example, key=i, value=i*10
  }
  thrust::copy(stored_pairs.begin(), stored_pairs.end(), host_pairs.begin());

// Print each pair
  for (const auto& p : host_pairs) {
    std::cout << "k: " << p.first << " v: " << p.second << std::endl;
  }
  return 0;
}