26 #include <kvikio/error.hpp>
27 #include <kvikio/shim/cuda.hpp>
32 inline constexpr std::size_t page_size = 4096;
34 [[nodiscard]]
inline off_t convert_size2off(std::size_t x)
36 if (x >=
static_cast<std::size_t
>(std::numeric_limits<off_t>::max())) {
37 throw CUfileException(
"size_t argument too large to fit off_t");
39 return static_cast<off_t
>(x);
42 [[nodiscard]]
inline ssize_t convert_size2ssize(std::size_t x)
44 if (x >=
static_cast<std::size_t
>(std::numeric_limits<ssize_t>::max())) {
45 throw CUfileException(
"size_t argument too large to fit ssize_t");
47 return static_cast<ssize_t
>(x);
50 [[nodiscard]]
inline CUdeviceptr convert_void2deviceptr(
const void* devPtr)
53 return reinterpret_cast<CUdeviceptr
>(devPtr);
64 inline bool is_host_memory(
const void* ptr)
66 CUpointer_attribute attrs[1] = {
67 CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
69 CUmemorytype memtype{};
70 void* data[1] = {&memtype};
72 cudaAPI::instance().PointerGetAttributes(1, attrs, data, convert_void2deviceptr(ptr));
75 if (result == CUDA_ERROR_NOT_INITIALIZED) {
return true; }
76 CUDA_DRIVER_TRY(result);
81 return memtype == 0 || memtype == CU_MEMORYTYPE_HOST;
90 [[nodiscard]]
inline int get_device_ordinal_from_pointer(CUdeviceptr dev_ptr)
94 cudaAPI::instance().PointerGetAttribute(&ret, CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL, dev_ptr));
108 CUDA_DRIVER_TRY(cudaAPI::instance().DeviceGet(&dev, device_ordinal));
109 CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRetain(&ctx, dev));
118 CUDA_DRIVER_TRY(cudaAPI::instance().DevicePrimaryCtxRelease(dev),
CUfileException);
120 std::cerr << e.what() << std::endl;
133 [[nodiscard]]
inline CUcontext get_primary_cuda_context(
int ordinal)
135 static std::map<int, CudaPrimaryContext> _primary_contexts;
136 _primary_contexts.try_emplace(ordinal, ordinal);
137 return _primary_contexts.at(ordinal).ctx;
146 [[nodiscard]]
inline std::optional<CUcontext> get_context_associated_pointer(CUdeviceptr dev_ptr)
148 CUcontext ctx =
nullptr;
150 cudaAPI::instance().PointerGetAttribute(&ctx, CU_POINTER_ATTRIBUTE_CONTEXT, dev_ptr);
151 if (err == CUDA_SUCCESS && ctx !=
nullptr) {
return ctx; }
152 if (err != CUDA_ERROR_INVALID_VALUE) { CUDA_DRIVER_TRY(err); }
162 [[nodiscard]]
inline bool current_context_can_access_pointer(CUdeviceptr dev_ptr)
164 CUdeviceptr current_ctx_dev_ptr{};
165 const CUresult err = cudaAPI::instance().PointerGetAttribute(
166 ¤t_ctx_dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, dev_ptr);
167 if (err == CUDA_SUCCESS && current_ctx_dev_ptr == dev_ptr) {
return true; }
168 if (err != CUDA_ERROR_INVALID_VALUE) { CUDA_DRIVER_TRY(err); }
188 [[nodiscard]]
inline CUcontext get_context_from_pointer(
const void* devPtr)
190 CUdeviceptr dev_ptr = convert_void2deviceptr(devPtr);
194 auto ctx = get_context_associated_pointer(dev_ptr);
195 if (ctx.has_value()) {
return ctx.value(); }
201 CUcontext ctx =
nullptr;
202 CUDA_DRIVER_TRY(cudaAPI::instance().CtxGetCurrent(&ctx));
203 if (ctx !=
nullptr && current_context_can_access_pointer(dev_ptr)) {
return ctx; }
208 return get_primary_cuda_context(get_device_ordinal_from_pointer(dev_ptr));
221 CUDA_DRIVER_TRY(cudaAPI::instance().CtxPushCurrent(_ctx));
230 CUDA_DRIVER_TRY(cudaAPI::instance().CtxPopCurrent(&_ctx),
CUfileException);
232 std::cerr << e.what() << std::endl;
238 inline std::tuple<void*, std::size_t, std::size_t> get_alloc_info(
const void* devPtr,
239 CUcontext* ctx =
nullptr)
241 auto dev = convert_void2deviceptr(devPtr);
242 CUdeviceptr base_ptr{};
243 std::size_t base_size{};
245 if (ctx !=
nullptr) {
248 _ctx = get_context_from_pointer(devPtr);
250 PushAndPopContext context(_ctx);
251 CUDA_DRIVER_TRY(cudaAPI::instance().MemGetAddressRange(&base_ptr, &base_size, dev));
252 std::size_t offset = dev - base_ptr;
254 return std::make_tuple(
reinterpret_cast<void*
>(base_ptr), base_size, offset);
257 template <
typename T>
258 inline bool is_future_done(
const T& future)
260 return future.wait_for(std::chrono::seconds(0)) != std::future_status::timeout;
RAII wrapper for a CUDA primary context.
Push CUDA context on creation and pop it on destruction.