int64_t get_device_attribute(int64_t attribute, int64_t device_id) { | |
int device, value; | |
if (device_id < 0) { | |
cudaGetDevice(&device); | |
} else { | |
device = device_id; | |
} | |
cudaDeviceGetAttribute(&value, static_cast<cudaDeviceAttr>(attribute), | |
device); | |
return value; | |
} | |
int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id) { | |
int64_t attribute; | |
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html | |
// cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74 | |
attribute = hipDeviceAttributeMaxSharedMemoryPerBlock; | |
attribute = cudaDevAttrMaxSharedMemoryPerBlockOptin; | |
return get_device_attribute(attribute, device_id); | |
} | |