This patch adds the First Fault Register to the AArch64 port, as well
as a fake register known as the FFR Token or FFRT.  The main ACLE
patch explains what the FFRT does and how it works.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r277561.

Richard


2019-10-29  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * config/aarch64/aarch64.md (FFR_REGNUM, FFRT_REGNUM): New constants.
        * config/aarch64/aarch64.h (FIRST_PSEUDO_REGISTER): Bump to
        FFRT_REGNUM + 1.
        (FFR_REGS, PR_AND_FFR_REGS): New register classes.
        (REG_CLASS_NAMES, REG_CLASS_CONTENTS): Add entries for them.
        * config/aarch64/aarch64.c (pr_or_ffr_regnum_p): New function.
        (aarch64_hard_regno_nregs): Handle the new register classes.
        (aarch64_hard_regno_mode_ok): Likewise.
        (aarch64_regno_regclass): Likewise.
        (aarch64_class_max_nregs): Likewise.
        (aarch64_register_move_cost): Likewise.
        (aarch64_conditional_register_usage): Don't treat FFR and FFRT
        as general register_operands.

Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md       2019-10-22 08:46:57.359355939 +0100
+++ gcc/config/aarch64/aarch64.md       2019-10-29 08:43:45.930130470 +0000
@@ -107,6 +107,10 @@ (define_constants
     (P13_REGNUM                81)
     (P14_REGNUM                82)
     (P15_REGNUM                83)
+    (FFR_REGNUM                84)
+    ;; "FFR token": a fake register used for representing the scheduling
+    ;; restrictions on FFR-related operations.
+    (FFRT_REGNUM       85)
     ;; Scratch register used by stack clash protection to calculate
     ;; SVE CFA offsets during probing.
     (STACK_CLASH_SVE_CFA_REGNUM 11)
Index: gcc/config/aarch64/aarch64.h
===================================================================
--- gcc/config/aarch64/aarch64.h        2019-10-22 08:46:57.363355908 +0100
+++ gcc/config/aarch64/aarch64.h        2019-10-29 08:43:45.926130497 +0000
@@ -366,6 +366,9 @@ #define TARGET_SIMD_RDMA (TARGET_SIMD &&
    P0-P7        Predicate low registers: valid in all predicate contexts
    P8-P15       Predicate high registers: used as scratch space
 
+   FFR         First Fault Register, a fixed-use SVE predicate register
+   FFRT                FFR token: a fake register used for modelling 
dependencies
+
    VG           Pseudo "vector granules" register
 
    VG is the number of 64-bit elements in an SVE vector.  We define
@@ -386,6 +389,7 @@ #define FIXED_REGISTERS                                     
\
     1, 1, 1, 1,                        /* SFP, AP, CC, VG */   \
     0, 0, 0, 0,   0, 0, 0, 0,   /* P0 - P7 */           \
     0, 0, 0, 0,   0, 0, 0, 0,   /* P8 - P15 */          \
+    1, 1                       /* FFR and FFRT */      \
   }
 
 /* X30 is marked as caller-saved which is in line with regular function call
@@ -408,6 +412,7 @@ #define CALL_USED_REGISTERS                         \
     1, 1, 1, 1,                        /* SFP, AP, CC, VG */   \
     1, 1, 1, 1,   1, 1, 1, 1,  /* P0 - P7 */           \
     1, 1, 1, 1,   1, 1, 1, 1,  /* P8 - P15 */          \
+    1, 1                       /* FFR and FFRT */      \
   }
 
 #define REGISTER_NAMES                                         \
@@ -423,6 +428,7 @@ #define REGISTER_NAMES                                      
        \
     "sfp", "ap",  "cc",  "vg",                                 \
     "p0",  "p1",  "p2",  "p3",  "p4",  "p5",  "p6",  "p7",     \
     "p8",  "p9",  "p10", "p11", "p12", "p13", "p14", "p15",    \
+    "ffr", "ffrt"                                              \
   }
 
 /* Generate the register aliases for core register N */
@@ -471,7 +477,7 @@ #define HARD_FRAME_POINTER_REGNUM   R29_RE
 #define FRAME_POINTER_REGNUM           SFP_REGNUM
 #define STACK_POINTER_REGNUM           SP_REGNUM
 #define ARG_POINTER_REGNUM             AP_REGNUM
-#define FIRST_PSEUDO_REGISTER          (P15_REGNUM + 1)
+#define FIRST_PSEUDO_REGISTER          (FFRT_REGNUM + 1)
 
 /* The number of (integer) argument register available.  */
 #define NUM_ARG_REGS                   8
@@ -601,6 +607,8 @@ enum reg_class
   PR_LO_REGS,
   PR_HI_REGS,
   PR_REGS,
+  FFR_REGS,
+  PR_AND_FFR_REGS,
   ALL_REGS,
   LIM_REG_CLASSES              /* Last */
 };
@@ -621,6 +629,8 @@ #define REG_CLASS_NAMES                             \
   "PR_LO_REGS",                                        \
   "PR_HI_REGS",                                        \
   "PR_REGS",                                   \
+  "FFR_REGS",                                  \
+  "PR_AND_FFR_REGS",                           \
   "ALL_REGS"                                   \
 }
 
@@ -638,6 +648,8 @@ #define REG_CLASS_CONTENTS                                  
        \
   { 0x00000000, 0x00000000, 0x00000ff0 },      /* PR_LO_REGS */        \
   { 0x00000000, 0x00000000, 0x000ff000 },      /* PR_HI_REGS */        \
   { 0x00000000, 0x00000000, 0x000ffff0 },      /* PR_REGS */           \
+  { 0x00000000, 0x00000000, 0x00300000 },      /* FFR_REGS */          \
+  { 0x00000000, 0x00000000, 0x003ffff0 },      /* PR_AND_FFR_REGS */   \
   { 0xffffffff, 0xffffffff, 0x000fffff }       /* ALL_REGS */          \
 }
 
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c        2019-10-29 08:41:39.187037003 +0000
+++ gcc/config/aarch64/aarch64.c        2019-10-29 08:43:45.926130497 +0000
@@ -1426,6 +1426,14 @@ aarch64_err_no_fpadvsimd (machine_mode m
             " vector types", "+nofp");
 }
 
+/* Return true if REGNO is P0-P15 or one of the special FFR-related
+   registers.  */
+inline bool
+pr_or_ffr_regnum_p (unsigned int regno)
+{
+  return PR_REGNUM_P (regno) || regno == FFR_REGNUM || regno == FFRT_REGNUM;
+}
+
 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
    The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and
    GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much
@@ -1810,6 +1818,8 @@ aarch64_hard_regno_nregs (unsigned regno
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
+    case FFR_REGS:
+    case PR_AND_FFR_REGS:
       return 1;
     default:
       return CEIL (lowest_size, UNITS_PER_WORD);
@@ -1836,10 +1846,10 @@ aarch64_hard_regno_mode_ok (unsigned reg
     return false;
 
   if (vec_flags & VEC_SVE_PRED)
-    return PR_REGNUM_P (regno);
+    return pr_or_ffr_regnum_p (regno);
 
-  if (PR_REGNUM_P (regno))
-    return 0;
+  if (pr_or_ffr_regnum_p (regno))
+    return false;
 
   if (regno == SP_REGNUM)
     /* The purpose of comparing with ptr_mode is to support the
@@ -9163,6 +9173,9 @@ aarch64_regno_regclass (unsigned regno)
   if (PR_REGNUM_P (regno))
     return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
 
+  if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
+    return FFR_REGS;
+
   return NO_REGS;
 }
 
@@ -9461,6 +9474,8 @@ aarch64_class_max_nregs (reg_class_t reg
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
+    case FFR_REGS:
+    case PR_AND_FFR_REGS:
       return 1;
 
     case NO_REGS:
@@ -11641,6 +11656,14 @@ aarch64_register_move_cost (machine_mode
   if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
     from = GENERAL_REGS;
 
+  /* Make RDFFR very expensive.  In particular, if we know that the FFR
+     contains a PTRUE (e.g. after a SETFFR), we must never use RDFFR
+     as a way of obtaining a PTRUE.  */
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+      && hard_reg_set_subset_p (reg_class_contents[from_i],
+                               reg_class_contents[FFR_REGS]))
+    return 80;
+
   /* Moving between GPR and stack cost is the same as GP2GP.  */
   if ((from == GENERAL_REGS && to == STACK_REG)
       || (to == GENERAL_REGS && from == STACK_REG))
@@ -14802,6 +14825,10 @@ aarch64_conditional_register_usage (void
        call_used_regs[i] = 1;
       }
 
+  /* Only allow the FFR and FFRT to be accessed via special patterns.  */
+  CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM);
+  CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM);
+
   /* When tracking speculation, we need a couple of call-clobbered registers
      to track the speculation state.  It would be nice to just use
      IP0 and IP1, but currently there are numerous places that just

Reply via email to