8000 More granular CPU features detection by Vika-F · Pull Request #3170 · uxlfoundation/oneDAL · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

More granular CPU features detection #3170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions cpp/daal/include/services/cpu_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,25 @@ enum CpuType
lastCpuType = rv64
#endif
};

/**
* Supported CPU features.
* The features are defined as bit masks in order to allow for easy combination of features.
* For example, (avx512_bf16 | avx512_vnni) will return a bit mask that indicates both the avx512_bf16
* and avx512_vnni features are supported.
* This allows for easy checking if a specific feature is supported by using a bitwise AND operation.
* For example, (cpuFeatures & avx512_bf16) will return true if the avx512_bf16 feature is supported.
*/
enum CpuFeature
{
unknown = 0ULL, /*!< Unknown features */
#if defined(TARGET_X86_64)
sstep = (1ULL << 0), /*!< Intel(R) SpeedStep */
tb = (1ULL << 1), /*!< Intel(R) Turbo Boost */
avx512_bf16 = (1ULL << 2), /*!< AVX-512 bfloat16 */
avx512_vnni = (1ULL << 3), /*!< AVX-512 Vector Neural Network Instructions (VNNI) */
tb3 = (1ULL << 4), /*!< Intel(R) Turbo Boost Max 3.0 */
#endif
};
} // namespace daal
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
//--
*/

#include "services/env_detect.h"
#include "services/daal_defines.h"
#include "services/internal/daal_kernel_defines.h"

#if defined(TARGET_X86_64)
#include <immintrin.h>
Expand Down Expand Up @@ -103,6 +103,14 @@ DAAL_EXPORT bool daal_check_is_intel_cpu()
return result;
}

/// Check if the result of CPUID instruction contains the required mask.
///
/// \param eax Input EAX register value passed to CPUID.
/// \param ecx Input ECX register value passed to CPUID.
/// \param abcd_index The index of the output register to check:
/// 0 - EAX, 1 - EBX, 2 - ECX, 3 - EDX.
/// \param mask The bit mask to check in the output register.
/// \return 1 if the mask is present in the output register, 0 otherwise.
static int check_cpuid(uint32_t eax, uint32_t ecx, int abcd_index, uint32_t mask)
{
if (daal_get_max_extension_support() < eax)
Expand Down Expand Up @@ -219,6 +227,7 @@ DAAL_EXPORT int __daal_serv_cpu_detect(int enable)
#if defined(__APPLE__)
__daal_serv_CPUHasAVX512f_enable_it_mac();
#endif

if (check_avx512_features() && daal_check_is_intel_cpu())
{
return daal::avx512;
Expand All @@ -236,6 +245,84 @@ DAAL_EXPORT int __daal_serv_cpu_detect(int enable)

return daal::sse2;
}

int __daal_internal_enabled_cpu_detect()
{
#ifdef DAAL_KERNEL_AVX512
if (check_avx512_features() && daal_check_is_intel_cpu())
{
return daal::avx512;
}
#endif

#ifdef DAAL_KERNEL_AVX2
if (check_avx2_features())
{
return daal::avx2;
}
#endif

#ifdef DAAL_KERNEL_SSE42
if (check_sse42_features())
{
return daal::sse42;
}
#endif

return daal::sse2;
}

DAAL_EXPORT int daal_enabled_cpu_detect()
{
// We cache the result in a static variable here.
static const int result = __daal_internal_enabled_cpu_detect();
return result;
}

/// Check if the CPU supports the specified feature
/// \param result The result of the CPU feature detection of type DAAL_UINT64.
/// A combination of CPU features.
/// \param eax Input EAX register value passed to CPUID.
/// \param ecx Input ECX register value passed to CPUID.
/// \param abcd_id The index of the output register to check:
/// 0 - EAX, 1 - EBX, 2 - ECX, 3 - EDX.
/// \param bit The bit position in the output register to check.
/// \param feature The CPU feature to check of type daal::CpuFeature.
#define DAAL_TEST_CPU_FEATURE(result, eax, ecx, abcd_id, bit, feature) \
if (check_cpuid(eax, ecx, abcd_id, (1 << bit))) \
{ \
result |= feature; \
}

DAAL_UINT64 __daal_internal_serv_cpu_feature_detect()
{
DAAL_UINT64 result = daal::CpuFeature::unknown;
if (!daal_check_is_intel_cpu())
{
return result;
}

if (check_avx512_features())
{
DAAL_TEST_CPU_FEATURE(result, 7, 1, 0, 5, daal::CpuFeature::avx512_bf16);
DAAL_TEST_CPU_FEATURE(result, 7, 0, 2, 11, daal::CpuFeature::avx512_vnni);
}
DAAL_TEST_CPU_FEATURE(result, 1, 0, 2, 7, daal::CpuFeature::sstep);
DAAL_TEST_CPU_FEATURE(result, 6, 0, 0, 1, daal::CpuFeature::tb);
DAAL_TEST_CPU_FEATURE(result, 6, 0, 0, 14, daal::CpuFeature::tb3);

return result;
}

#undef DAAL_TEST_CPU_FEATURE

DAAL_EXPORT DAAL_UINT64 daal_serv_cpu_feature_detect()
{
// We cache the result in a static variable here.
static const DAAL_UINT64 result = __daal_internal_serv_cpu_feature_detect();
return result;
}

#elif defined(TARGET_ARM)
static bool check_sve_features()
{
Expand All @@ -253,6 +340,17 @@ DAAL_EXPORT int __daal_serv_cpu_detect(int enable)
return -1;
}

DAAL_EXPORT int daal_enabled_cpu_detect()
{
#ifdef DAAL_KERNEL_SVE
if (check_sve_features())
{
return daal::sve;
}
#endif
return -1;
}

void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t * abcd)
{
// TODO: ARM implementation for cpuid
Expand All @@ -262,12 +360,23 @@ bool daal_check_is_intel_cpu()
{
return false;
}

DAAL_EXPORT DAAL_UINT64 daal_serv_cpu_feature_detect()
{
return daal::CpuFeature::unknown;
}

#elif defined(TARGET_RISCV64)
DAAL_EXPORT int __daal_serv_cpu_detect(int enable)
{
return daal::rv64;
}

DAAL_EXPORT int daal_enabled_cpu_detect()
{
return daal::rv64;
}

void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t * abcd)
{
// TODO: riscv64 implementation for cpuid
Expand All @@ -277,4 +386,9 @@ bool daal_check_is_intel_cpu()
{
return false;
}

DAAL_EXPORT DAAL_UINT64 daal_serv_cpu_feature_detect()
{
return daal::CpuFeature::unknown;
}
#endif
2 changes: 2 additions & 0 deletions cpp/daal/src/services/service_defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include "services/cpu_type.h"

DAAL_EXPORT int __daal_serv_cpu_detect(int);
DAAL_EXPORT int daal_enabled_cpu_detect();
DAAL_EXPORT DAAL_UINT64 daal_serv_cpu_feature_detect();

void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t * abcd);
DAAL_EXPORT bool daal_check_is_intel_cpu();
Expand Down
13 changes: 8 additions & 5 deletions cpp/oneapi/dal/detail/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,18 @@ ONEDAL_EXPORT cpu_extension from_daal_cpu_type(int cpu_type) {
}

ONEDAL_EXPORT cpu_extension detect_top_cpu_extension() {
#if defined(TARGET_ARM)
return detail::cpu_extension::sve;
#elif defined(TARGET_RISCV64)
return detail::cpu_extension::rv64;
#endif
const auto daal_cpu = __daal_serv_cpu_detect(0);
return from_daal_cpu_type(daal_cpu);
}

ONEDAL_EXPORT cpu_extension detect_onedal_cpu_extension() {
const auto daal_cpu = daal_enabled_cpu_detect();
return from_daal_cpu_type(daal_cpu);
}

uint64_t detect_cpu_features() {
return daal_serv_cpu_feature_detect();
}

} // namespace v1
} // namespace oneapi::dal::detail
64 changes: 58 additions & 6 deletions cpp/oneapi/dal/detail/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include <cstdint>
#include "oneapi/dal/common.hpp"

#include <map>
#include <string>

// TODO: Clean up this redefinition and import the defines globally.
#if defined(__x86_64__) || defined(__x86_64) || defined(__amd64) || defined(_M_AMD64)
#define TARGET_X86_64
Expand All @@ -35,27 +38,76 @@
namespace oneapi::dal::detail {
namespace v1 {

/// CPU vendor enumeration.
enum class cpu_vendor { unknown = 0, intel = 1, amd = 2, arm = 3, riscv64 = 4 };

/// CPU extension enumeration.
/// This enum is used to represent the highest supported CPU extension.
enum class cpu_extension : uint64_t {
none = 0U,
#if defined(TARGET_X86_64)
sse2 = 1U << 0,
sse42 = 1U << 2,
avx2 = 1U << 4,
avx512 = 1U << 5
sse2 = 1U << 0, /// Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2)
sse42 = 1U << 2, /// Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2)
avx2 = 1U << 4, /// Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)
avx512 =
1U
<< 5 /// Intel(R) Xeon(R) processors based on Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512)
#elif defined(TARGET_ARM)
sve = 1U << 0
sve = 1U << 0 /// Arm(R) processors based on Arm's Scalable Vector Extension (SVE)
#elif defined(TARGET_RISCV64)
rv64 = 1U << 0
rv64 = 1U << 0 /// RISC-V 64-bit architecture
#endif
};

enum class cpu_feature : uint64_t {
unknown = 0ULL,
#if defined(TARGET_X86_64)
sstep = 1ULL << 0, /// Intel(R) SpeedStep
tb = 1ULL << 1, /// Intel(R) Turbo Boost
avx512_bf16 = 1ULL << 2, /// AVX512 bfloat16
avx512_vnni = 1ULL << 3, /// AVX512 VNNI
tb3 = 1ULL << 4 /// Intel(R) Turbo Boost Max 3.0
#endif
};

/// A map of CPU features to their string representations.
/// This map is used to convert CPU feature bitmasks to human-readable strings.
/// Keys are bitflags representing CPU features. They are defined in daal::CpuFeature enumeration.
static const std::map<uint64_t, const std::string> cpu_feature_map = {
{ uint64_t(cpu_feature::unknown), "Unknown" },
#if defined(TARGET_X86_64)
{ uint64_t(cpu_feature::sstep), "Intel(R) SpeedStep" },
{ uint64_t(cpu_feature::tb), "Intel(R) Turbo Boost" },
{ uint64_t(cpu_feature::avx512_bf16), "AVX-512 bfloat16" },
{ uint64_t(cpu_feature::avx512_vnni), "AVX-512 VNNI" },
{ uint64_t(cpu_feature::tb3), "Intel(R) Turbo Boost Max 3.0" }
#endif
};

/// Converts a DAAL CPU extension value to oneDAL enumeration.
/// @param ext The DAAL CPU extension value.
/// @return The corresponding oneDAL CPU extension value.
ONEDAL_EXPORT cpu_extension from_daal_cpu_type(int);

/// Detects the highest supported CPU extension.
/// @return The corresponding oneDAL CPU extension value.
ONEDAL_EXPORT cpu_extension detect_top_cpu_extension();

/// Detects the highest CPU extension used by oneDAL.
/// If REQCPU was used, it might be different from the one returned by detect_top_cpu_extension.
/// @return The corresponding oneDAL CPU extension value.
ONEDAL_EXPORT cpu_extension detect_onedal_cpu_extension();

/// Detects the CPU features.
/// @return Bitmask representing the supported CPU features.
/// @note The bitmask is a combination of the CPU feature bitflags defined in daal::CpuFeature enumeration.
uint64_t detect_cpu_features();

} // namespace v1
using v1::cpu_vendor;
using v1::cpu_extension;
using v1::cpu_feature_map;
using v1::detect_top_cpu_extension;
using v1::detect_onedal_cpu_extension;
using v1::detect_cpu_features;
} // namespace oneapi::dal::detail
18 changes: 8 additions & 10 deletions cpp/oneapi/dal/detail/cpu_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,6 @@ cpu_info::cpu_info() {
#endif
}

cpu_info::cpu_info(const cpu_extension cpu_extension_) {
#if defined(TARGET_X86_64)
impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_x86>(cpu_extension_));
#elif defined(TARGET_ARM)
impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_arm>(cpu_extension_));
#elif defined(TARGET_RISCV64)
impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_riscv64>(cpu_extension_));
#endif
}

detail::cpu_vendor cpu_info::get_cpu_vendor() const {
return impl_->get_cpu_vendor();
}
Expand All @@ -57,6 +47,14 @@ detail::cpu_extension cpu_info::get_top_cpu_extension() const {
return impl_->get_top_cpu_extension();
}

detail::cpu_extension cpu_info::get_onedal_cpu_extension() const {
return impl_->get_onedal_cpu_extension();
}

uint64_t cpu_info::get_cpu_features() const {
return impl_->get_cpu_features();
}

std::string cpu_info::dump() const {
return impl_->dump();
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/oneapi/dal/detail/cpu_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ namespace v1 {
class cpu_info : public cpu_info_iface {
public:
cpu_info();
explicit cpu_info(const cpu_extension cpu_extension_);

cpu_vendor get_cpu_vendor() const override;
cpu_extension get_top_cpu_extension() const override;
cpu_extension get_onedal_cpu_extension() const override;
uint64_t get_cpu_features() const override;

std::string dump() const override;

Expand Down
9 changes: 3 additions & 6 deletions cpp/oneapi/dal/detail/cpu_info_arm_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ namespace v1 {
class cpu_info_arm : public cpu_info_impl {
public:
cpu_info_arm() {
info_["top_cpu_extension"] = cpu_extension::sve;
info_["vendor"] = cpu_vendor::arm;
}

explicit cpu_info_arm(const cpu_extension cpu_extension) {
info_["top_cpu_extension"] = cpu_extension;
info_["top_cpu_extension"] = detect_top_cpu_extension();
info_["onedal_cpu_extension"] = detect_onedal_cpu_extension();
info_["vendor"] = cpu_vendor::arm;
info_["cpu_features"] = detect_cpu_features();
}
};

Expand Down
8 changes: 8 additions & 0 deletions cpp/oneapi/dal/detail/cpu_info_iface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ class cpu_info_iface {
/// The highest supported CPU extension
virtual cpu_extension get_top_cpu_extension() const = 0;

/// The highest used by oneDAL CPU extension
/// If REQCPU make flag was used, it might be different from the one returned by get_top_cpu_extension.
virtual cpu_extension get_onedal_cpu_extension() const = 0;

/// The CPU features avaliable on the system
/// @return The CPU features bitmask
virtual uint64_t get_cpu_features() const = 0;

/// The dump of all supported CPU features in the format:
/// feature 1: value1; feature2: value2; ...
virtual std::string dump() const = 0;
Expand Down
Loading
Loading
0