Skip to content

ARM: Fixes and additions to CPU feature detection #14049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ggml/include/ggml-cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ extern "C" {
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
GGML_BACKEND_API int ggml_cpu_has_sve (void);
GGML_BACKEND_API int ggml_cpu_has_sve2 (void);
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
GGML_BACKEND_API int ggml_cpu_has_sme (void);
// other
Expand Down
58 changes: 47 additions & 11 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,13 @@
struct ggml_arm_arch_features_type {
int has_neon;
int has_dotprod;
int has_fp16_va;
int has_i8mm;
int has_sve;
int has_sve2;
int sve_cnt;
int has_sme;
} ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
} ggml_arm_arch_features = {-1, -1, -1, -1, -1, -1, 0, -1};
#endif


Expand Down Expand Up @@ -689,8 +691,10 @@ static void ggml_init_arm_arch_features(void) {

ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
ggml_arm_arch_features.has_fp16_va = !!(hwcap & HWCAP_FPHP);
Copy link
Preview

Copilot AI Jun 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macro HWCAP_FPHP looks like a typo; the standard HWCAP for FP16 support is usually HWCAP_FP16. Verify and correct this macro to ensure proper runtime detection.

Suggested change
ggml_arm_arch_features.has_fp16_va = !!(hwcap & HWCAP_FPHP);
ggml_arm_arch_features.has_fp16_va = !!(hwcap & HWCAP_FP16);

Copilot uses AI. Check for mistakes.

ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
ggml_arm_arch_features.has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);

#if defined(__ARM_FEATURE_SVE)
Expand All @@ -709,6 +713,11 @@ static void ggml_init_arm_arch_features(void) {
}
ggml_arm_arch_features.has_dotprod = oldp;

if (sysctlbyname("hw.optional.arm.FEAT_FP16", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
ggml_arm_arch_features.has_fp16_va = oldp;

if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
oldp = 0;
}
Expand All @@ -719,8 +728,9 @@ static void ggml_init_arm_arch_features(void) {
}
ggml_arm_arch_features.has_sme = oldp;

ggml_arm_arch_features.has_sve = 0;
ggml_arm_arch_features.sve_cnt = 0;
ggml_arm_arch_features.has_sve = 0;
ggml_arm_arch_features.has_sve2 = 0;
ggml_arm_arch_features.sve_cnt = 0;
#else
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
#if defined(__ARM_NEON)
Expand All @@ -729,6 +739,18 @@ static void ggml_init_arm_arch_features(void) {
ggml_arm_arch_features.has_neon = 0;
#endif

#if defined(__ARM_FEATURE_DOTPROD)
ggml_arm_arch_features.has_dotprod = 1;
#else
ggml_arm_arch_features.has_dotprod = 0;
#endif

#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Copy link
Preview

Copilot AI Jun 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The compile-time block for has_fp16_va may override the sysctl result unconditionally (even on Apple). Wrap these macros in an #else of the Apple-specific sysctl branch to avoid conflicting assignments.

Copilot uses AI. Check for mistakes.

ggml_arm_arch_features.has_fp16_va = 1;
#else
ggml_arm_arch_features.has_fp16_va = 0;
#endif

#if defined(__ARM_FEATURE_MATMUL_INT8)
ggml_arm_arch_features.has_i8mm = 1;
#else
Expand All @@ -743,6 +765,12 @@ static void ggml_init_arm_arch_features(void) {
ggml_arm_arch_features.sve_cnt = 0;
#endif

#if defined(__ARM_FEATURE_SVE2)
ggml_arm_arch_features.has_sve2 = 1;
#else
ggml_arm_arch_features.has_sve2 = 0;
#endif

#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
ggml_arm_arch_features.has_sme = 1;
#else
Expand Down Expand Up @@ -3377,14 +3405,6 @@ int ggml_cpu_has_f16c(void) {
#endif
}

int ggml_cpu_has_fp16_va(void) {
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
return 1;
#else
return 0;
#endif
}

int ggml_cpu_has_wasm_simd(void) {
#if defined(__wasm_simd128__)
return 1;
Expand Down Expand Up @@ -3449,6 +3469,14 @@ int ggml_cpu_has_dotprod(void) {
#endif
}

int ggml_cpu_has_fp16_va(void) {
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why we do a second ifdef here when this variable is set to 0 or 1 elsewhere.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment below, case 1. The way I see it, during runtime detection the host may report that the CPU supports this feature, but if we disabled it at compilation, we want the function to always return 0.

return ggml_arm_arch_features.has_fp16_va;
#else
return 0;
#endif
}

int ggml_cpu_has_sve(void) {
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
return ggml_arm_arch_features.has_sve;
Expand All @@ -3457,6 +3485,14 @@ int ggml_cpu_has_sve(void) {
#endif
}

int ggml_cpu_has_sve2(void) {
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE2)
return ggml_arm_arch_features.has_sve2;
#else
return 0;
#endif
}

int ggml_cpu_has_matmul_int8(void) {
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
return ggml_arm_arch_features.has_i8mm;
Expand Down
3 changes: 3 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
if (ggml_cpu_has_sve()) {
features.push_back({ "SVE", "1" });
}
if (ggml_cpu_has_sve2()) {
features.push_back({ "SVE2", "1" });
}
if (ggml_cpu_has_dotprod()) {
features.push_back({ "DOTPROD", "1" });
}
Expand Down
Loading