diff --git a/sycl/include/CL/sycl/aspects.hpp b/sycl/include/CL/sycl/aspects.hpp index ba4ab93b347cf..5b5d16175977a 100644 --- a/sycl/include/CL/sycl/aspects.hpp +++ b/sycl/include/CL/sycl/aspects.hpp @@ -48,6 +48,7 @@ enum class aspect { ext_oneapi_srgb = 30, ext_oneapi_native_assert = 31, host_debuggable = 32, + ext_intel_gpu_hw_threads_per_eu = 33, }; } // namespace sycl diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index c27be2edb35e8..4a0d719894c0e 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -301,6 +301,7 @@ typedef enum { PI_DEVICE_INFO_IMAGE_SRGB = 0x10027, PI_DEVICE_INFO_ATOMIC_64 = 0x10110, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, + PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, diff --git a/sycl/include/CL/sycl/feature_test.hpp.in b/sycl/include/CL/sycl/feature_test.hpp.in index 381057e6cc684..25de3eedfa57b 100644 --- a/sycl/include/CL/sycl/feature_test.hpp.in +++ b/sycl/include/CL/sycl/feature_test.hpp.in @@ -23,7 +23,7 @@ namespace sycl { // Feature test macro definitions // TODO: Move these feature-test macros to compiler driver. -#define SYCL_EXT_INTEL_DEVICE_INFO 2 +#define SYCL_EXT_INTEL_DEVICE_INFO 3 #define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1 #define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1 // As for SYCL_EXT_ONEAPI_MATRIX: diff --git a/sycl/include/CL/sycl/info/device_traits.def b/sycl/include/CL/sycl/info/device_traits.def index 7393a898636a9..b63857e22e1f9 100644 --- a/sycl/include/CL/sycl/info/device_traits.def +++ b/sycl/include/CL/sycl/info/device_traits.def @@ -95,6 +95,7 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_simd_width, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_slices, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_subslices_per_slice, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_eu_count_per_subslice, pi_uint32) +__SYCL_PARAM_TRAITS_SPEC(device, ext_intel_gpu_hw_threads_per_eu, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_max_mem_bandwidth, pi_uint64) __SYCL_PARAM_TRAITS_SPEC(device, ext_intel_mem_channel, bool) __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_srgb, bool) diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index f47ff2b53399a..58cd6a4958c9b 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -156,6 +156,7 @@ enum class device : cl_device_info { ext_intel_gpu_subslices_per_slice = PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE, ext_intel_gpu_eu_count_per_subslice = PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, + ext_intel_gpu_hw_threads_per_eu = PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, ext_intel_max_mem_bandwidth = PI_DEVICE_INFO_MAX_MEM_BANDWIDTH, ext_intel_mem_channel = PI_MEM_PROPERTIES_CHANNEL, ext_oneapi_srgb = PI_DEVICE_INFO_IMAGE_SRGB, diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 77a7f9c50c008..2b642b79a46e7 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1675,6 +1675,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GPU_SLICES: case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: // TODO: Check if Intel device UUID extension is utilized for CUDA. // For details about this extension, see diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d5ce87e8b410d..2ef095d7c2cc8 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1607,6 +1607,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GPU_SLICES: case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: return PI_INVALID_VALUE; diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 7eb43482e8a8c..bfe5016f64e20 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2463,6 +2463,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: return ReturnValue( pi_uint32{Device->ZeDeviceProperties->numEUsPerSubslice}); + case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: + return ReturnValue(pi_uint32{Device->ZeDeviceProperties->numThreadsPerEU}); case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: // currently not supported in level zero runtime return PI_INVALID_VALUE; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e259ac5e942cf..fc0bd8f04a024 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -178,12 +178,15 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, switch (paramName) { // TODO: Check regularly to see if support in enabled in OpenCL. // Intel GPU EU device-specific information extensions. + // Some of the queries are enabled by cl_intel_device_attribute_query + // extension, but it's not yet in the Registry. case PI_DEVICE_INFO_PCI_ADDRESS: case PI_DEVICE_INFO_GPU_EU_COUNT: case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: case PI_DEVICE_INFO_GPU_SLICES: case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: // TODO: Check if device UUID extension is enabled in OpenCL. // For details about Intel UUID extension, see diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index cd2be7559e72f..69dec907448dc 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -303,6 +303,11 @@ bool device_impl::has(aspect Aspect) const { MDevice, PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE, sizeof(pi_device_type), &device_type, &return_size) == PI_SUCCESS; + case aspect::ext_intel_gpu_hw_threads_per_eu: + return getPlugin().call_nocheck( + MDevice, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU, + sizeof(pi_device_type), &device_type, + &return_size) == PI_SUCCESS; case aspect::ext_intel_device_info_uuid: { auto Result = getPlugin().call_nocheck( MDevice, PI_DEVICE_INFO_UUID, 0, nullptr, &return_size); diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 7e953ad72eabd..681269659e3ed 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -1283,6 +1283,13 @@ get_device_info_host() { PI_INVALID_DEVICE); } template <> +inline cl_uint +get_device_info_host() { + throw runtime_error( + "Obtaining the HW threads count per EU is not supported on HOST device", + PI_INVALID_DEVICE); +} +template <> inline cl_ulong get_device_info_host() { throw runtime_error( diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 95e0d29106179..6abc4b4b35bba 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4382,3 +4382,8 @@ _ZNK2cl4sycl9exception8categoryEv _ZNK2cl4sycl9kernel_id8get_nameEv __sycl_register_lib __sycl_unregister_lib +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131072EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131075EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131074EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131073EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 0df42b5fa4e8d..996ef41a313aa 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -148,6 +148,7 @@ ??$get_info@$0EBJD@@device@sycl@cl@@QEBA_NXZ ??$get_info@$0EBJE@@device@sycl@cl@@QEBA_NXZ ??$get_info@$0ECBD@@device@sycl@cl@@QEBA_NXZ +??$get_info@$0BABBC@@device@sycl@cl@@QEBAIXZ ??$get_info@$0JAA@@platform@sycl@cl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_info@$0JAB@@platform@sycl@cl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_info@$0JAC@@platform@sycl@cl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ