[PATCH][AARCH64]: Invent new regclass - FP low regs.

Tejas Belagod Tue, 19 Jun 2012 07:03:39 -0700

Hi,


The attached patch invents a new register class V0 - V15 that is needed for some

lane variants of AdvSIMD instructions that can only take V0 - V15 as theirindexed register when working on half-word type.


Regression tests are happy. OK?

Thanks,
Tejas Belagod.
ARM.

Changelog:

2012-06-19  Tejas Belagod  <[email protected]>

gcc/
        * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>,
        aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal,
        aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>,
        aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>,
        aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal,
        aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>,
        aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>,
        aarch64_sqdmull_lane<mode>_internal, aarch64_sqdmull_lane<mode>,
        aarch64_sqdmull_laneq<mode>, aarch64_sqdmull2_lane<mode>_internal,
        aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Change the
        constraint of the indexed operand to use <vwl> instead of w.
        * config/aarch64/aarch64.c (aarch64_hard_regno_nregs): Add case for
        FP_LO_REGS class.
        (aarch64_regno_regclass): Return FP_LO_REGS if register in V0 - V15.
        (aarch64_secondary_reload): Change condition to check for both FP reg
        classes.
        (aarch64_class_max_nregs): Add case for FP_LO_REGS.
        * config/aarch64/aarch64.h (reg_class): New register class FP_LO_REGS.
        (REG_CLASS_NAMES): Likewise.
        (REG_CLASS_CONTENTS): Likewise.
        (FP_LO_REGNUM_P): New.
        * config/aarch64/aarch64.md (V15_REGNUM): New.
        * config/aarch64/constraints.md (x): New register constraint.
        * config/aarch64/iterators.md (vwx): New.

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 9ceefee..43017df 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1897,7 +1897,7 @@
         (unspec:VSDQ_HSI
          [(match_operand:VSDQ_HSI 1 "register_operand" "w")
            (vec_select:<VEL>
-             (match_operand:<VCON> 2 "register_operand" "w")
+             (match_operand:<VCON> 2 "register_operand" "<vwx>")
              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
         VQDMULH))]
   "TARGET_SIMD"
@@ -1940,7 +1940,7 @@
              (sign_extend:<VWIDE>
                (vec_duplicate:VD_HSI
                  (vec_select:<VEL>
-                   (match_operand:<VCON> 3 "register_operand" "w")
+                   (match_operand:<VCON> 3 "register_operand" "<vwx>")
                    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               ))
            (const_int 1))))]
@@ -1960,7 +1960,7 @@
                (match_operand:SD_HSI 2 "register_operand" "w"))
              (sign_extend:<VWIDE>
                (vec_select:<VEL>
-                 (match_operand:<VCON> 3 "register_operand" "w")
+                 (match_operand:<VCON> 3 "register_operand" "<vwx>")
                  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               )
            (const_int 1))))]
@@ -1974,7 +1974,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -1989,7 +1989,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2004,7 +2004,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2019,7 +2019,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2114,7 +2114,7 @@
                (sign_extend:<VWIDE>
                   (vec_duplicate:<VHALF>
                    (vec_select:<VEL>
-                     (match_operand:<VCON> 3 "register_operand" "w")
+                     (match_operand:<VCON> 3 "register_operand" "<vwx>")
                      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
                    ))))
              (const_int 1))))]
@@ -2128,7 +2128,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2144,7 +2144,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2160,7 +2160,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2176,7 +2176,7 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "w")
+   (match_operand:<VCON> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2264,7 +2264,7 @@
               (sign_extend:<VWIDE>
                  (vec_duplicate:VD_HSI
                    (vec_select:<VEL>
-                    (match_operand:<VCON> 2 "register_operand" "w")
+                    (match_operand:<VCON> 2 "register_operand" "<vwx>")
                     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
               ))
             (const_int 1)))]
@@ -2282,7 +2282,7 @@
                 (match_operand:SD_HSI 1 "register_operand" "w"))
               (sign_extend:<VWIDE>
                  (vec_select:<VEL>
-                  (match_operand:<VCON> 2 "register_operand" "w")
+                  (match_operand:<VCON> 2 "register_operand" "<vwx>")
                   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
               ))
             (const_int 1)))]
@@ -2295,7 +2295,7 @@
 (define_expand "aarch64_sqdmull_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VSD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2308,7 +2308,7 @@
 (define_expand "aarch64_sqdmull_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2386,7 +2386,7 @@
               (sign_extend:<VWIDE>
                  (vec_duplicate:<VHALF>
                    (vec_select:<VEL>
-                    (match_operand:<VCON> 2 "register_operand" "w")
+                    (match_operand:<VCON> 2 "register_operand" "<vwx>")
                     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
               ))
             (const_int 1)))]
@@ -2399,7 +2399,7 @@
 (define_expand "aarch64_sqdmull2_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
@@ -2414,7 +2414,7 @@
 (define_expand "aarch64_sqdmull2_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")
+   (match_operand:<VCON> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3e4b48e..b877df3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -241,6 +241,7 @@ aarch64_hard_regno_nregs (unsigned regno, enum machine_mode 
mode)
   switch (aarch64_regno_regclass (regno))
     {
     case FP_REGS:
+    case FP_LO_REGS:
       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
     default:
       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
@@ -3457,7 +3458,7 @@ aarch64_regno_regclass (unsigned regno)
     return CORE_REGS;
 
   if (FP_REGNUM_P (regno))
-    return FP_REGS;
+    return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
 
   return NO_REGS;
 }
@@ -3590,10 +3591,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, 
rtx x,
 
   /* Without the TARGET_SIMD instructions we cannot move a Q register
      to a Q register directly.  We need a scratch.  */
-  if (rclass == FP_REGS && REG_P (x)
-      && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
-      && FP_REGNUM_P (REGNO (x))
-      && !TARGET_SIMD)
+  if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
+      && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
+      && reg_class_subset_p (rclass, FP_REGS))
     {
       if (mode == TFmode)
         sri->icode = CODE_FOR_aarch64_reload_movtf;
@@ -3609,7 +3609,8 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx 
x,
       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
     return FP_REGS;
 
-  if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
+  if ((mode == TImode || mode == TFmode) && CONSTANT_P(x)
+      && reg_class_subset_p (rclass, FP_REGS))
       return CORE_REGS;
 
   return NO_REGS;
@@ -3748,6 +3749,7 @@ aarch64_class_max_nregs (reg_class_t regclass, enum 
machine_mode mode)
     case GENERAL_REGS:
     case ALL_REGS:
     case FP_REGS:
+    case FP_LO_REGS:
       return (GET_MODE_SIZE (mode) + 7) / 8;
 
     case STACK_REG:
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2faded7..56e2df5 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -361,6 +361,10 @@ extern unsigned long aarch64_tune_flags;
 
 #define FP_REGNUM_P(REGNO)                     \
   (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM))
+
+#define FP_LO_REGNUM_P(REGNO)            \
+  (((unsigned) (REGNO - V0_REGNUM)) <= (V15_REGNUM - V0_REGNUM))
+
 
 /* Register and constant classes.  */
 
@@ -371,6 +375,7 @@ enum reg_class
   GENERAL_REGS,
   STACK_REG,
   POINTER_REGS,
+  FP_LO_REGS,
   FP_REGS,
   ALL_REGS,
   LIM_REG_CLASSES              /* Last */
@@ -385,6 +390,7 @@ enum reg_class
   "GENERAL_REGS",                              \
   "STACK_REG",                                 \
   "POINTER_REGS",                              \
+  "FP_LO_REGS",                                        \
   "FP_REGS",                                   \
   "ALL_REGS"                                   \
 }
@@ -396,6 +402,7 @@ enum reg_class
   { 0x7fffffff, 0x00000000, 0x00000003 },      /* GENERAL_REGS */      \
   { 0x80000000, 0x00000000, 0x00000000 },      /* STACK_REG */         \
   { 0xffffffff, 0x00000000, 0x00000003 },      /* POINTER_REGS */      \
+  { 0x00000000, 0x0000ffff, 0x00000000 },       /* FP_LO_REGS  */      \
   { 0x00000000, 0xffffffff, 0x00000000 },       /* FP_REGS  */         \
   { 0xffffffff, 0xffffffff, 0x00000007 }       /* ALL_REGS */          \
 }
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a666ed9..7b2a899 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -57,6 +57,7 @@
     (LR_REGNUM         30)
     (SP_REGNUM         31)
     (V0_REGNUM         32)
+    (V15_REGNUM                47)
     (V31_REGNUM                63)
     (SFP_REGNUM                64)
     (AP_REGNUM         65)
diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index 91eba09..da50a47 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -24,6 +24,9 @@
 (define_register_constraint "w" "FP_REGS"
   "Floating point and SIMD vector registers.")
 
+(define_register_constraint "x" "FP_LO_REGS"
+  "Floating point and SIMD vector registers V0 - V15.")
+
 (define_constraint "I"
  "A constant that can be used with an ADD operation."
  (and (match_code "const_int")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 6dc3b2f..fc7fc50 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -326,6 +326,10 @@
                                (V2SF "V2SI") (V4SF  "V4SI")
                                (DI   "DI")   (V2DI  "V2DI")])
 
+;; Vm for lane instructions is restricted to FP_LO_REGS.
+(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
+                      (V2SI "w") (V4SI "w") (SI "w")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------

[PATCH][AARCH64]: Invent new regclass - FP low regs.

Reply via email to