Fix CPU features initialization.  Use HWCAP rather than explicit accesses
to CPUID registers.  Perform the initialization atomically to avoid multi-
threading issues.

Passes regress, OK for commit and backport?

libgcc:
        PR target/115342
        * config/aarch64/cpuinfo.c (__init_cpu_features_constructor):
        Use HWCAP where possible.  Use atomic write for initialization.
        (__init_cpu_features_resolver): Use atomic load for correct
        initialization.
        (__init_cpu_features): Likewise.

---

diff --git a/libgcc/config/aarch64/cpuinfo.c b/libgcc/config/aarch64/cpuinfo.c
index 
4b94fca869507145ec690c825f637abbc82a3493..544c5516133ec3a554d1222de2ea9d5e6d4c27a9
 100644
--- a/libgcc/config/aarch64/cpuinfo.c
+++ b/libgcc/config/aarch64/cpuinfo.c
@@ -227,14 +227,22 @@ struct {
 #ifndef HWCAP2_SVE_EBF16
 #define HWCAP2_SVE_EBF16 (1UL << 33)
 #endif
+#ifndef HWCAP2_SME2
+#define HWCAP2_SME2 (1UL << 37)
+#endif
+#ifndef HWCAP2_LRCPC3
+#define HWCAP2_LRCPC3  (1UL << 46)
+#endif
 
 static void
-__init_cpu_features_constructor(unsigned long hwcap,
-                               const __ifunc_arg_t *arg) {
-#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
+__init_cpu_features_constructor (unsigned long hwcap,
+                                const __ifunc_arg_t *arg)
+{
+  unsigned long feat = 0;
+#define setCPUFeature(F) feat |= 1UL << F
 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
 #define extractBits(val, start, number) \
-  (val & ((1ULL << number) - 1ULL) << start) >> start
+  (val & ((1UL << number) - 1UL) << start) >> start
   unsigned long hwcap2 = 0;
   if (hwcap & _IFUNC_ARG_HWCAP)
     hwcap2 = arg->_hwcap2;
@@ -244,26 +252,20 @@ __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_PMULL);
   if (hwcap & HWCAP_FLAGM)
     setCPUFeature(FEAT_FLAGM);
-  if (hwcap2 & HWCAP2_FLAGM2) {
-    setCPUFeature(FEAT_FLAGM);
+  if (hwcap2 & HWCAP2_FLAGM2)
     setCPUFeature(FEAT_FLAGM2);
-  }
-  if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
+  if (hwcap & HWCAP_SM4)
     setCPUFeature(FEAT_SM4);
   if (hwcap & HWCAP_ASIMDDP)
     setCPUFeature(FEAT_DOTPROD);
   if (hwcap & HWCAP_ASIMDFHM)
     setCPUFeature(FEAT_FP16FML);
-  if (hwcap & HWCAP_FPHP) {
+  if (hwcap & HWCAP_FPHP)
     setCPUFeature(FEAT_FP16);
-    setCPUFeature(FEAT_FP);
-  }
   if (hwcap & HWCAP_DIT)
     setCPUFeature(FEAT_DIT);
   if (hwcap & HWCAP_ASIMDRDM)
     setCPUFeature(FEAT_RDM);
-  if (hwcap & HWCAP_ILRCPC)
-    setCPUFeature(FEAT_RCPC2);
   if (hwcap & HWCAP_AES)
     setCPUFeature(FEAT_AES);
   if (hwcap & HWCAP_SHA1)
@@ -277,22 +279,21 @@ __init_cpu_features_constructor(unsigned long hwcap,
   if (hwcap & HWCAP_SB)
     setCPUFeature(FEAT_SB);
   if (hwcap & HWCAP_SSBS)
-    setCPUFeature(FEAT_SSBS2);
-  if (hwcap2 & HWCAP2_MTE) {
-    setCPUFeature(FEAT_MEMTAG);
-    setCPUFeature(FEAT_MEMTAG2);
-  }
-  if (hwcap2 & HWCAP2_MTE3) {
-    setCPUFeature(FEAT_MEMTAG);
-    setCPUFeature(FEAT_MEMTAG2);
+    {
+      setCPUFeature(FEAT_SSBS);
+      setCPUFeature(FEAT_SSBS2);
+    }
+  if (hwcap2 & HWCAP2_MTE)
+    {
+      setCPUFeature(FEAT_MEMTAG);
+      setCPUFeature(FEAT_MEMTAG2);
+    }
+  if (hwcap2 & HWCAP2_MTE3)
     setCPUFeature(FEAT_MEMTAG3);
-  }
   if (hwcap2 & HWCAP2_SVEAES)
     setCPUFeature(FEAT_SVE_AES);
-  if (hwcap2 & HWCAP2_SVEPMULL) {
-    setCPUFeature(FEAT_SVE_AES);
+  if (hwcap2 & HWCAP2_SVEPMULL)
     setCPUFeature(FEAT_SVE_PMULL128);
-  }
   if (hwcap2 & HWCAP2_SVEBITPERM)
     setCPUFeature(FEAT_SVE_BITPERM);
   if (hwcap2 & HWCAP2_SVESHA3)
@@ -329,108 +330,76 @@ __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_WFXT);
   if (hwcap2 & HWCAP2_SME)
     setCPUFeature(FEAT_SME);
+  if (hwcap2 & HWCAP2_SME2)
+    setCPUFeature(FEAT_SME2);
   if (hwcap2 & HWCAP2_SME_I16I64)
     setCPUFeature(FEAT_SME_I64);
   if (hwcap2 & HWCAP2_SME_F64F64)
     setCPUFeature(FEAT_SME_F64);
-  if (hwcap & HWCAP_CPUID) {
-    unsigned long ftr;
-    getCPUFeature(ID_AA64PFR1_EL1, ftr);
-    /* ID_AA64PFR1_EL1.MTE >= 0b0001  */
-    if (extractBits(ftr, 8, 4) >= 0x1)
-      setCPUFeature(FEAT_MEMTAG);
-    /* ID_AA64PFR1_EL1.SSBS == 0b0001  */
-    if (extractBits(ftr, 4, 4) == 0x1)
-      setCPUFeature(FEAT_SSBS);
-    /* ID_AA64PFR1_EL1.SME == 0b0010  */
-    if (extractBits(ftr, 24, 4) == 0x2)
-      setCPUFeature(FEAT_SME2);
-    getCPUFeature(ID_AA64PFR0_EL1, ftr);
-    /* ID_AA64PFR0_EL1.FP != 0b1111  */
-    if (extractBits(ftr, 16, 4) != 0xF) {
-      setCPUFeature(FEAT_FP);
-      /* ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP  */
-      setCPUFeature(FEAT_SIMD);
-    }
-    /* ID_AA64PFR0_EL1.SVE != 0b0000  */
-    if (extractBits(ftr, 32, 4) != 0x0) {
-      /* get ID_AA64ZFR0_EL1, that name supported if sve enabled only  */
-      getCPUFeature(S3_0_C0_C4_4, ftr);
-      /* ID_AA64ZFR0_EL1.SVEver == 0b0000  */
-      if (extractBits(ftr, 0, 4) == 0x0)
-       setCPUFeature(FEAT_SVE);
-      /* ID_AA64ZFR0_EL1.SVEver == 0b0001  */
-      if (extractBits(ftr, 0, 4) == 0x1)
-       setCPUFeature(FEAT_SVE2);
-      /* ID_AA64ZFR0_EL1.BF16 != 0b0000  */
-      if (extractBits(ftr, 20, 4) != 0x0)
-       setCPUFeature(FEAT_SVE_BF16);
+  if (hwcap & HWCAP_CPUID)
+    {
+      unsigned long ftr;
+
+      getCPUFeature(ID_AA64ISAR1_EL1, ftr);
+      /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001  */
+      if (extractBits(ftr, 40, 4) >= 0x1)
+       setCPUFeature(FEAT_PREDRES);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0001  */
+      if (extractBits(ftr, 60, 4) >= 0x1)
+       setCPUFeature(FEAT_LS64);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0010  */
+      if (extractBits(ftr, 60, 4) >= 0x2)
+       setCPUFeature(FEAT_LS64_V);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0011  */
+      if (extractBits(ftr, 60, 4) >= 0x3)
+       setCPUFeature(FEAT_LS64_ACCDATA);
     }
-    getCPUFeature(ID_AA64ISAR0_EL1, ftr);
-    /* ID_AA64ISAR0_EL1.SHA3 != 0b0000  */
-    if (extractBits(ftr, 32, 4) != 0x0)
-      setCPUFeature(FEAT_SHA3);
-    getCPUFeature(ID_AA64ISAR1_EL1, ftr);
-    /* ID_AA64ISAR1_EL1.DPB >= 0b0001  */
-    if (extractBits(ftr, 0, 4) >= 0x1)
-      setCPUFeature(FEAT_DPB);
-    /* ID_AA64ISAR1_EL1.LRCPC != 0b0000  */
-    if (extractBits(ftr, 20, 4) != 0x0)
-      setCPUFeature(FEAT_RCPC);
-    /* ID_AA64ISAR1_EL1.LRCPC == 0b0011  */
-    if (extractBits(ftr, 20, 4) == 0x3)
-      setCPUFeature(FEAT_RCPC3);
-    /* ID_AA64ISAR1_EL1.SPECRES == 0b0001  */
-    if (extractBits(ftr, 40, 4) == 0x2)
-      setCPUFeature(FEAT_PREDRES);
-    /* ID_AA64ISAR1_EL1.BF16 != 0b0000  */
-    if (extractBits(ftr, 44, 4) != 0x0)
-      setCPUFeature(FEAT_BF16);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0001  */
-    if (extractBits(ftr, 60, 4) >= 0x1)
-      setCPUFeature(FEAT_LS64);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0010  */
-    if (extractBits(ftr, 60, 4) >= 0x2)
-      setCPUFeature(FEAT_LS64_V);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0011  */
-    if (extractBits(ftr, 60, 4) >= 0x3)
-      setCPUFeature(FEAT_LS64_ACCDATA);
-  } else {
-    /* Set some features in case of no CPUID support.  */
-    if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
+
+  if (hwcap & HWCAP_FP)
+    {
       setCPUFeature(FEAT_FP);
       /* FP and AdvSIMD fields have the same value.  */
       setCPUFeature(FEAT_SIMD);
     }
-    if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
-      setCPUFeature(FEAT_DPB);
-    if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
-      setCPUFeature(FEAT_RCPC);
-    if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
-      setCPUFeature(FEAT_BF16);
-    if (hwcap2 & HWCAP2_SVEBF16)
-      setCPUFeature(FEAT_SVE_BF16);
-    if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
-      setCPUFeature(FEAT_SVE2);
-    if (hwcap & HWCAP_SHA3)
-      setCPUFeature(FEAT_SHA3);
-  }
+  if (hwcap & HWCAP_DCPOP)
+    setCPUFeature(FEAT_DPB);
+  if (hwcap & HWCAP_LRCPC)
+    setCPUFeature(FEAT_RCPC);
+  if (hwcap & HWCAP_ILRCPC)
+    setCPUFeature(FEAT_RCPC2);
+  if (hwcap2 & HWCAP2_LRCPC3)
+    setCPUFeature(FEAT_RCPC3);
+  if (hwcap2 & HWCAP2_BF16)
+    setCPUFeature(FEAT_BF16);
+  if (hwcap2 & HWCAP2_SVEBF16)
+    setCPUFeature(FEAT_SVE_BF16);
+  if (hwcap & HWCAP_SVE)
+    setCPUFeature(FEAT_SVE);
+  if (hwcap2 & HWCAP2_SVE2)
+    setCPUFeature(FEAT_SVE2);
+  if (hwcap & HWCAP_SHA3)
+    setCPUFeature(FEAT_SHA3);
   setCPUFeature(FEAT_INIT);
+
+  __atomic_store_n (&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
 }
 
 void
-__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) {
-  if (__aarch64_cpu_features.features)
+__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg)
+{
+  if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
   __init_cpu_features_constructor(hwcap, arg);
 }
 
 void __attribute__ ((constructor))
-__init_cpu_features(void) {
+__init_cpu_features(void)
+{
   unsigned long hwcap;
   unsigned long hwcap2;
+
   /* CPU features already initialized.  */
-  if (__aarch64_cpu_features.features)
+  if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
   hwcap = getauxval(AT_HWCAP);
   hwcap2 = getauxval(AT_HWCAP2);

Reply via email to