On 03/10/2023 16:18, Victor Do Nascimento wrote:
Given the implementation of a mechanism of encoding system registers
into GCC, this patch provides the mechanism of validating their use by
the compiler.  In particular, this involves:

   1. Ensuring a supplied string corresponds to a known system
      register name.  System registers can be accessed either via their
      name (e.g. `SPSR_EL1') or their encoding (e.g. `S3_0_C4_C0_0').
      Register names are validated using a binary search of the
      `sysreg_names' structure populated from the
      `aarch64_system_regs.def' file via `match_reg'.
      The encoding naming convention is validated via a parser
      implemented in this patch - `is_implem_def_reg'.
   2. Once a given register name is deemed to be valid, it is checked
      against a further 2 criteria:
        a. Is the referenced register implemented in the target
           architecture?  This is achieved by comparing the ARCH field
          in the relevant SYSREG entry from `aarch64_system_regs.def'
          against `aarch64_feature_flags' flags set at compile-time.
        b. Is the register being used correctly?  Check the requested
                  operation against the FLAGS specified in SYSREG.
          This prevents operations like writing to a read-only system
          register.
        NOTE: For registers specified via their encoding
        (e.g. `S3_0_C4_C0_0'), once the encoding value is deemed valid
        (as per step 1) no further checks such as read/write support or
        architectural feature requirements are done and this second step
        is skipped, as is done in gas.

gcc/ChangeLog:

        * gcc/config/aarch64/aarch64-protos.h (aarch64_valid_sysreg_name_p): 
New.
        (aarch64_retrieve_sysreg): Likewise.
        * gcc/config/aarch64/aarch64.cc (match_reg): Likewise.
        (is_implem_def_reg): Likewise.
        (aarch64_valid_sysreg_name_p): Likewise.
        (aarch64_retrieve_sysreg): Likewise.
        (aarch64_sysreg_valid_for_rw_p): Likewise.
        * gcc/config/aarch64/predicates.md (aarch64_sysreg_string): New.
---
  gcc/config/aarch64/aarch64-protos.h |   2 +
  gcc/config/aarch64/aarch64.cc       | 121 ++++++++++++++++++++++++++++
  gcc/config/aarch64/predicates.md    |   4 +
  3 files changed, 127 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 60a55f4bc19..a134e2fcf8e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -830,6 +830,8 @@ bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
  bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
  bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *,
                        enum simd_immediate_check w = AARCH64_CHECK_MOV);
+bool aarch64_valid_sysreg_name_p (const char *);
+const char *aarch64_retrieve_sysreg (char *, bool);
  rtx aarch64_check_zero_based_sve_index_immediate (rtx);
  bool aarch64_sve_index_immediate_p (rtx);
  bool aarch64_sve_arith_immediate_p (machine_mode, rtx, bool);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 030b39ded1a..dd5ac1cbc8d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -28070,6 +28070,127 @@ aarch64_pars_overlap_p (rtx par1, rtx par2)
    return false;
  }
+/* Binary search of a user-supplied system register name against
+   a database of known register names.  Upon match the index of
+   hit in database is returned, else return -1.  */

Given that we expect the number of explicit sysregs in a single compilation unit to be small, this is probably OK. An alternative would be to build a hashmap of the register names the first time this routine is called and then do a lookup in that. That would also avoid the need for the list to be maintained alphabetically.

+int
+match_reg (const char *ref, const char *database[], int db_len)
+{
+  /* Check for named system registers.  */
+  int imin = 0, imax = db_len - 1, mid, cmp_res;
+  while (imin <= imax)
+    {
+      mid = (imin + imax) / 2;
+
+      cmp_res = strcmp (ref, database[mid]);
+      if (cmp_res == 0)
+       return mid;
+      else if (cmp_res > 0)
+       imin = mid+1;
+      else
+       imax = mid-1;
+    }
+  return -1;
+}
+
+/* Parse an implementation-defined system register name of
+   the form S[0-3]_[0-7]_C[0-15]_C[0-15]_[1-7].
+   Return true if name matched against above pattern, false
+   otherwise.  */

Another advantage of using a hash map above would be that we could then add registers matched by this routine to the map and therefore optimize rescanning for them (on the basis that if they are used once, there's a good chance of them being used again).

+bool
+is_implem_def_reg (const char *regname)
+{
+    /* Check for implementation-defined system registers.  */
+  int name_len = strlen (regname);
+  if (name_len < 12 || name_len > 14)
+    return false;
+
+  int pos = 0, i = 0, j = 0;
+  char n[3] = {0}, m[3] = {0};
+  if (regname[pos] != 's' && regname[pos] != 'S')
+    return false;
+  pos++;
+  if (regname[pos] < '0' || regname[pos] > '3')
+    return false;
+  pos++;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] < '0' || regname[pos] > '7')
+    return false;
+  pos++;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] != 'c' && regname[pos] != 'C')
+    return false;
+  pos++;
+  while (regname[pos] != '_')
+    {
+      if (i > 2)
+       return false;
+      if (!ISDIGIT (regname[pos]))
+       return false;
+      n[i++] = regname[pos++];
+    }
+  if (atoi (n) > 15)
+    return false;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] != 'c' && regname[pos] != 'C')
+    return false;
+  pos++;
+  while (regname[pos] != '_')
+    {
+      if (j > 2)
+       return false;
+      if (!ISDIGIT (regname[pos]))
+       return false;
+      m[j++] = regname[pos++];
+    }
+  if (atoi (m) > 15)
+    return false;
+  if (regname[pos++] != '_')
+    return false;
+  if (regname[pos] < '0' || regname[pos] > '7')
+    return false;
+  return true;
+}
+
+/* Ensure a supplied system register name is implemented in the target
+   architecture.  For use in back-end predicate `aarch64_sysreg_string'.  */
+bool
+aarch64_valid_sysreg_name_p (const char *regname)
+{
+  int reg_id = match_reg (regname, sysreg_names, nsysreg);
+  if (reg_id == -1)
+    return is_implem_def_reg (regname);
+  return (aarch64_isa_flags & sysreg_reqs[reg_id]);
+}
+
+/* Ensure a supplied system register name is suitable for the desired use.
+   This involves checking its suitability for the requested read/write
+   operation and that it is implemented in the target architecture.
+
+   NOTE: The read/write flags refer to whether a given register is
+   read/write-only.  */

The more usual comment style here would be something like:

Validate REGNAME against the permitted system register names, or a generic sysreg specification. WRITE_P is true iff the register is being written.


+const char *
+aarch64_retrieve_sysreg (char *regname, bool write_p)
+{
+  int reg_id = match_reg (regname, sysreg_names, nsysreg);
+  if (reg_id == -1)
+    {
+      if (is_implem_def_reg (regname))
+       return (const char *) regname;
+      else
+       return NULL;
+    }
+  if ((write_p && (sysreg_properties[reg_id] & F_REG_READ))
+      || (!write_p && (sysreg_properties[reg_id] & F_REG_WRITE)))
+    return NULL;
+  if (aarch64_isa_flags & sysreg_reqs[reg_id])
+    return sysreg_names_generic[reg_id];
+  return NULL;
+}
+
  /* Target-specific selftests.  */
                         ^^^^^^
I would be good to add some of the static tests that I mentioned in the binutils review to the selftests pass that GCC has. Eg, that the list is in alphabetical order (but see above about hash maps) and that each register has exactly one main definition (plus any number of aliases). This would then get run during the self-test run during the build process.

#if CHECKING_P
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 01de4743974..5f0d242e4a8 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -20,6 +20,10 @@
(include "../arm/common.md") +(define_predicate "aarch64_sysreg_string"
+  (and (match_code "const_string")
+       (match_test "aarch64_valid_sysreg_name_p (XSTR (op, 0))")))
+
  (define_special_predicate "cc_register"
    (and (match_code "reg")
         (and (match_test "REGNO (op) == CC_REGNUM")

R.

Reply via email to