Fix CPU features initialization. Use HWCAP rather than explicit accesses to CPUID registers. Perform the initialization atomically to avoid multi- threading issues.
Passes regress, OK for commit and backport? libgcc: PR target/115342 * config/aarch64/cpuinfo.c (__init_cpu_features_constructor): Use HWCAP where possible. Use atomic write for initialization. (__init_cpu_features_resolver): Use atomic load for correct initialization. (__init_cpu_features): Likewise. --- diff --git a/libgcc/config/aarch64/cpuinfo.c b/libgcc/config/aarch64/cpuinfo.c index 4b94fca869507145ec690c825f637abbc82a3493..544c5516133ec3a554d1222de2ea9d5e6d4c27a9 100644 --- a/libgcc/config/aarch64/cpuinfo.c +++ b/libgcc/config/aarch64/cpuinfo.c @@ -227,14 +227,22 @@ struct { #ifndef HWCAP2_SVE_EBF16 #define HWCAP2_SVE_EBF16 (1UL << 33) #endif +#ifndef HWCAP2_SME2 +#define HWCAP2_SME2 (1UL << 37) +#endif +#ifndef HWCAP2_LRCPC3 +#define HWCAP2_LRCPC3 (1UL << 46) +#endif static void -__init_cpu_features_constructor(unsigned long hwcap, - const __ifunc_arg_t *arg) { -#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F +__init_cpu_features_constructor (unsigned long hwcap, + const __ifunc_arg_t *arg) +{ + unsigned long feat = 0; +#define setCPUFeature(F) feat |= 1UL << F #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) #define extractBits(val, start, number) \ - (val & ((1ULL << number) - 1ULL) << start) >> start + (val & ((1UL << number) - 1UL) << start) >> start unsigned long hwcap2 = 0; if (hwcap & _IFUNC_ARG_HWCAP) hwcap2 = arg->_hwcap2; @@ -244,26 +252,20 @@ __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_PMULL); if (hwcap & HWCAP_FLAGM) setCPUFeature(FEAT_FLAGM); - if (hwcap2 & HWCAP2_FLAGM2) { - setCPUFeature(FEAT_FLAGM); + if (hwcap2 & HWCAP2_FLAGM2) setCPUFeature(FEAT_FLAGM2); - } - if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) + if (hwcap & HWCAP_SM4) setCPUFeature(FEAT_SM4); if (hwcap & HWCAP_ASIMDDP) setCPUFeature(FEAT_DOTPROD); if (hwcap & HWCAP_ASIMDFHM) setCPUFeature(FEAT_FP16FML); - if (hwcap & HWCAP_FPHP) { + if (hwcap & HWCAP_FPHP) setCPUFeature(FEAT_FP16); - setCPUFeature(FEAT_FP); - } if (hwcap & HWCAP_DIT) setCPUFeature(FEAT_DIT); if (hwcap & HWCAP_ASIMDRDM) setCPUFeature(FEAT_RDM); - if (hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC2); if (hwcap & HWCAP_AES) setCPUFeature(FEAT_AES); if (hwcap & HWCAP_SHA1) @@ -277,22 +279,21 @@ __init_cpu_features_constructor(unsigned long hwcap, if (hwcap & HWCAP_SB) setCPUFeature(FEAT_SB); if (hwcap & HWCAP_SSBS) - setCPUFeature(FEAT_SSBS2); - if (hwcap2 & HWCAP2_MTE) { - setCPUFeature(FEAT_MEMTAG); - setCPUFeature(FEAT_MEMTAG2); - } - if (hwcap2 & HWCAP2_MTE3) { - setCPUFeature(FEAT_MEMTAG); - setCPUFeature(FEAT_MEMTAG2); + { + setCPUFeature(FEAT_SSBS); + setCPUFeature(FEAT_SSBS2); + } + if (hwcap2 & HWCAP2_MTE) + { + setCPUFeature(FEAT_MEMTAG); + setCPUFeature(FEAT_MEMTAG2); + } + if (hwcap2 & HWCAP2_MTE3) setCPUFeature(FEAT_MEMTAG3); - } if (hwcap2 & HWCAP2_SVEAES) setCPUFeature(FEAT_SVE_AES); - if (hwcap2 & HWCAP2_SVEPMULL) { - setCPUFeature(FEAT_SVE_AES); + if (hwcap2 & HWCAP2_SVEPMULL) setCPUFeature(FEAT_SVE_PMULL128); - } if (hwcap2 & HWCAP2_SVEBITPERM) setCPUFeature(FEAT_SVE_BITPERM); if (hwcap2 & HWCAP2_SVESHA3) @@ -329,108 +330,76 @@ __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_WFXT); if (hwcap2 & HWCAP2_SME) setCPUFeature(FEAT_SME); + if (hwcap2 & HWCAP2_SME2) + setCPUFeature(FEAT_SME2); if (hwcap2 & HWCAP2_SME_I16I64) setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) setCPUFeature(FEAT_SME_F64); - if (hwcap & HWCAP_CPUID) { - unsigned long ftr; - getCPUFeature(ID_AA64PFR1_EL1, ftr); - /* ID_AA64PFR1_EL1.MTE >= 0b0001 */ - if (extractBits(ftr, 8, 4) >= 0x1) - setCPUFeature(FEAT_MEMTAG); - /* ID_AA64PFR1_EL1.SSBS == 0b0001 */ - if (extractBits(ftr, 4, 4) == 0x1) - setCPUFeature(FEAT_SSBS); - /* ID_AA64PFR1_EL1.SME == 0b0010 */ - if (extractBits(ftr, 24, 4) == 0x2) - setCPUFeature(FEAT_SME2); - getCPUFeature(ID_AA64PFR0_EL1, ftr); - /* ID_AA64PFR0_EL1.FP != 0b1111 */ - if (extractBits(ftr, 16, 4) != 0xF) { - setCPUFeature(FEAT_FP); - /* ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP */ - setCPUFeature(FEAT_SIMD); - } - /* ID_AA64PFR0_EL1.SVE != 0b0000 */ - if (extractBits(ftr, 32, 4) != 0x0) { - /* get ID_AA64ZFR0_EL1, that name supported if sve enabled only */ - getCPUFeature(S3_0_C0_C4_4, ftr); - /* ID_AA64ZFR0_EL1.SVEver == 0b0000 */ - if (extractBits(ftr, 0, 4) == 0x0) - setCPUFeature(FEAT_SVE); - /* ID_AA64ZFR0_EL1.SVEver == 0b0001 */ - if (extractBits(ftr, 0, 4) == 0x1) - setCPUFeature(FEAT_SVE2); - /* ID_AA64ZFR0_EL1.BF16 != 0b0000 */ - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_SVE_BF16); + if (hwcap & HWCAP_CPUID) + { + unsigned long ftr; + + getCPUFeature(ID_AA64ISAR1_EL1, ftr); + /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */ + if (extractBits(ftr, 40, 4) >= 0x1) + setCPUFeature(FEAT_PREDRES); + /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */ + if (extractBits(ftr, 60, 4) >= 0x1) + setCPUFeature(FEAT_LS64); + /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */ + if (extractBits(ftr, 60, 4) >= 0x2) + setCPUFeature(FEAT_LS64_V); + /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */ + if (extractBits(ftr, 60, 4) >= 0x3) + setCPUFeature(FEAT_LS64_ACCDATA); } - getCPUFeature(ID_AA64ISAR0_EL1, ftr); - /* ID_AA64ISAR0_EL1.SHA3 != 0b0000 */ - if (extractBits(ftr, 32, 4) != 0x0) - setCPUFeature(FEAT_SHA3); - getCPUFeature(ID_AA64ISAR1_EL1, ftr); - /* ID_AA64ISAR1_EL1.DPB >= 0b0001 */ - if (extractBits(ftr, 0, 4) >= 0x1) - setCPUFeature(FEAT_DPB); - /* ID_AA64ISAR1_EL1.LRCPC != 0b0000 */ - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_RCPC); - /* ID_AA64ISAR1_EL1.LRCPC == 0b0011 */ - if (extractBits(ftr, 20, 4) == 0x3) - setCPUFeature(FEAT_RCPC3); - /* ID_AA64ISAR1_EL1.SPECRES == 0b0001 */ - if (extractBits(ftr, 40, 4) == 0x2) - setCPUFeature(FEAT_PREDRES); - /* ID_AA64ISAR1_EL1.BF16 != 0b0000 */ - if (extractBits(ftr, 44, 4) != 0x0) - setCPUFeature(FEAT_BF16); - /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */ - if (extractBits(ftr, 60, 4) >= 0x1) - setCPUFeature(FEAT_LS64); - /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */ - if (extractBits(ftr, 60, 4) >= 0x2) - setCPUFeature(FEAT_LS64_V); - /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */ - if (extractBits(ftr, 60, 4) >= 0x3) - setCPUFeature(FEAT_LS64_ACCDATA); - } else { - /* Set some features in case of no CPUID support. */ - if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { + + if (hwcap & HWCAP_FP) + { setCPUFeature(FEAT_FP); /* FP and AdvSIMD fields have the same value. */ setCPUFeature(FEAT_SIMD); } - if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) - setCPUFeature(FEAT_DPB); - if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC); - if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) - setCPUFeature(FEAT_BF16); - if (hwcap2 & HWCAP2_SVEBF16) - setCPUFeature(FEAT_SVE_BF16); - if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) - setCPUFeature(FEAT_SVE2); - if (hwcap & HWCAP_SHA3) - setCPUFeature(FEAT_SHA3); - } + if (hwcap & HWCAP_DCPOP) + setCPUFeature(FEAT_DPB); + if (hwcap & HWCAP_LRCPC) + setCPUFeature(FEAT_RCPC); + if (hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC2); + if (hwcap2 & HWCAP2_LRCPC3) + setCPUFeature(FEAT_RCPC3); + if (hwcap2 & HWCAP2_BF16) + setCPUFeature(FEAT_BF16); + if (hwcap2 & HWCAP2_SVEBF16) + setCPUFeature(FEAT_SVE_BF16); + if (hwcap & HWCAP_SVE) + setCPUFeature(FEAT_SVE); + if (hwcap2 & HWCAP2_SVE2) + setCPUFeature(FEAT_SVE2); + if (hwcap & HWCAP_SHA3) + setCPUFeature(FEAT_SHA3); setCPUFeature(FEAT_INIT); + + __atomic_store_n (&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED); } void -__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) +__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) +{ + if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; __init_cpu_features_constructor(hwcap, arg); } void __attribute__ ((constructor)) -__init_cpu_features(void) { +__init_cpu_features(void) +{ unsigned long hwcap; unsigned long hwcap2; + /* CPU features already initialized. */ - if (__aarch64_cpu_features.features) + if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; hwcap = getauxval(AT_HWCAP); hwcap2 = getauxval(AT_HWCAP2);