Hello,

a while ago Andrew Stubbs posted a patch to use NEON registers
and instructions to perform 64-bit integer shifts:
http://gcc.gnu.org/ml/gcc-patches/2012-05/msg01645.html

As Andrew no longer works on ARM, I've now picked this up and
reworked it a bit:

- Updated for current mainline changes.
- Fixed a typo in the "left shift by 1" special case.
- Reworked constraint lists to have the NEON alternatives actually
  reliably chosen in the "left shift by register" case.
- Noticed that arm_emit_coreregs_64bit_shift actually does *not*
  need a scratch for shifting by constant in any case, which
  simplifies the implementation a bit.
- Further minor simplifications & cleanup.

Tested on arm-linux-gnueabi (--with-arch=armv7-a --with-float=softfp
--with-fpu=neon --with-mode=thumb) with no regressions.

OK for mainline?

Bye,
Ulrich

ChangeLog:

2012-09-17  Andrew Stubbs  <a...@codesourcery.com>
            Ulrich Weigand  <ulrich.weig...@linaro.org>

        * config/arm/arm.c (arm_print_operand): Add new 'E' format code.
        (arm_emit_coreregs_64bit_shift): Fix comment.
        * config/arm/arm.h (enum reg_class): Add VFP_LO_REGS_EVEN.
        (REG_CLASS_NAMES, REG_CLASS_CONTENTS, IS_VFP_CLASS): Likewise.
        * config/arm/arm.md (opt, opt_enabled): New attributes.
        (enabled): Use opt_enabled.
        (ashldi3, ashrdi3, lshrdi3): Add TARGET_NEON case.
        * config/arm/constraints.md (T): New register constraint.
        * config/arm/iterators.md (rshifts): New code iterator.
        (shift, shifttype): New code attributes.
        * config/arm/neon.md (signed_shift_di3_neon, unsigned_shift_di3_neon,
        ashldi3_neon_noclobber, ashldi3_neon, ashrdi3_neon_imm_noclobber,
        lshrdi3_neon_imm_noclobber, <shift>di3_neon): New patterns.


Index: gcc/config/arm/arm.c
===================================================================
*** gcc/config/arm/arm.c        (revision 191400)
--- gcc/config/arm/arm.c        (working copy)
*************** arm_print_operand (FILE *stream, rtx x, 
*** 17280,17285 ****
--- 17280,17303 ----
        }
        return;
  
+     /* Print the VFP/Neon double precision register name that overlaps the
+        given single-precision register.  */
+     case 'E':
+       {
+       int mode = GET_MODE (x);
+ 
+       if (GET_MODE_SIZE (mode) != 4
+           || GET_CODE (x) != REG
+           || !IS_VFP_REGNUM (REGNO (x)))
+         {
+           output_operand_lossage ("invalid operand for code '%c'", code);
+           return;
+         }
+ 
+       fprintf (stream, "d%d", (REGNO (x) - FIRST_VFP_REGNUM) >> 1);
+       }
+       return;
+ 
      /* These two codes print the low/high doubleword register of a Neon quad
         register, respectively.  For pair-structure types, can also print
         low/high quadword registers.  */
*************** arm_autoinc_modes_ok_p (enum machine_mod
*** 26293,26300 ****
     Input requirements:
      - It is safe for the input and output to be the same register, but
        early-clobber rules apply for the shift amount and scratch registers.
!     - Shift by register requires both scratch registers.  Shift by a constant
!       less than 32 in Thumb2 mode requires SCRATCH1 only.  In all other cases
        the scratch registers may be NULL.
      - Ashiftrt by a register also clobbers the CC register.  */
  void
--- 26311,26317 ----
     Input requirements:
      - It is safe for the input and output to be the same register, but
        early-clobber rules apply for the shift amount and scratch registers.
!     - Shift by register requires both scratch registers.  In all other cases
        the scratch registers may be NULL.
      - Ashiftrt by a register also clobbers the CC register.  */
  void
Index: gcc/config/arm/arm.h
===================================================================
*** gcc/config/arm/arm.h        (revision 191254)
--- gcc/config/arm/arm.h        (working copy)
*************** enum reg_class
*** 1120,1125 ****
--- 1120,1126 ----
    CORE_REGS,
    VFP_D0_D7_REGS,
    VFP_LO_REGS,
+   VFP_LO_REGS_EVEN,
    VFP_HI_REGS,
    VFP_REGS,
    IWMMXT_REGS,
*************** enum reg_class
*** 1146,1151 ****
--- 1147,1153 ----
    "CORE_REGS",                \
    "VFP_D0_D7_REGS",   \
    "VFP_LO_REGS",      \
+   "VFP_LO_REGS_EVEN", \
    "VFP_HI_REGS",      \
    "VFP_REGS",         \
    "IWMMXT_REGS",      \
*************** enum reg_class
*** 1169,1174 ****
--- 1171,1177 ----
    { 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \
    { 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS  */ \
    { 0xFFFF0000, 0x0000FFFF, 0x00000000, 0x00000000 }, /* VFP_LO_REGS  */ \
+   { 0x55550000, 0x00005555, 0x00000000, 0x00000000 }, /* VFP_LO_REGS_EVEN  */ 
\
    { 0x00000000, 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_HI_REGS  */ \
    { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF, 0x00000000 }, /* VFP_REGS  */ \
    { 0x00000000, 0x00000000, 0xFFFF0000, 0x00000000 }, /* IWMMXT_REGS */       
\
*************** enum reg_class
*** 1182,1188 ****
  
  /* Any of the VFP register classes.  */
  #define IS_VFP_CLASS(X) \
!   ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \
     || (X) == VFP_HI_REGS || (X) == VFP_REGS)
  
  /* The same information, inverted:
--- 1185,1191 ----
  
  /* Any of the VFP register classes.  */
  #define IS_VFP_CLASS(X) \
!   ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS || (X) == VFP_LO_REGS_EVEN \
     || (X) == VFP_HI_REGS || (X) == VFP_REGS)
  
  /* The same information, inverted:
Index: gcc/config/arm/neon.md
===================================================================
*** gcc/config/arm/neon.md      (revision 191400)
--- gcc/config/arm/neon.md      (working copy)
***************
*** 1170,1175 ****
--- 1170,1335 ----
    DONE;
  })
  
+ ;; 64-bit shifts
+ 
+ (define_insn "ashldi3_neon_noclobber"
+   [(set (match_operand:DI 0 "s_register_operand"          "=w,w")
+       (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
+                  (match_operand:SI 2 "reg_or_int_operand" " i,T")))]
+   "TARGET_NEON && reload_completed
+    && (!CONST_INT_P (operands[2])
+        || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
+   "@
+    vshl.u64\t%P0, %P1, %2
+    vshl.u64\t%P0, %P1, %E2    @ ashl %P0, %P1, %2"
+   [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd")]
+ )
+ 
+ (define_insn_and_split "ashldi3_neon"
+   [(set (match_operand:DI 0 "s_register_operand"          "=w,?&r,?r,?w")
+       (ashift:DI (match_operand:DI 1 "s_register_operand" " w, 0r, r, w")
+                  (match_operand:SI 2 "reg_or_int_operand" "Ti,  r, i,Ti")))
+    (clobber (match_scratch:SI 3                                   "=X,?&r, X, 
X"))
+    (clobber (match_scratch:SI 4                                   "=X,?&r, X, 
X"))
+    (clobber (reg:CC_C CC_REGNUM))]
+   "TARGET_NEON"
+   "#"
+   "TARGET_NEON && reload_completed"
+   [(const_int 0)]
+   "
+   {
+     if (IS_VFP_REGNUM (REGNO (operands[0])))
+       {
+         if (CONST_INT_P (operands[2]))
+         {
+           if (INTVAL (operands[2]) < 1)
+             {
+               emit_insn (gen_movdi (operands[0], operands[1]));
+               DONE;
+             }
+           else if (INTVAL (operands[2]) > 63)
+             operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
+         }
+ 
+       /* Ditch the unnecessary clobbers.  */
+       emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
+                                              operands[2]));
+       }
+     else
+       {
+       if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+         /* This clobbers CC.  */
+         emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
+       else
+         arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
+                                        operands[2], operands[3], operands[4]);
+       }
+     DONE;
+   }"
+   [(set_attr "arch" "nota8,*,*,onlya8")
+    (set_attr "opt" "*,speed,speed,*")]
+ )
+ 
+ ; The shift amount needs to be negated for right-shifts
+ (define_insn "signed_shift_di3_neon"
+   [(set (match_operand:DI 0 "s_register_operand"           "=w")
+       (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+                   (match_operand:SI 2 "s_register_operand" " T")]
+                  UNSPEC_ASHIFT_SIGNED))]
+   "TARGET_NEON && reload_completed"
+   "vshl.s64\t%P0, %P1, %E2    @ ashr %P0, %P1, %2"
+   [(set_attr "neon_type" "neon_vshl_ddd")]
+ )
+ 
+ ; The shift amount needs to be negated for right-shifts
+ (define_insn "unsigned_shift_di3_neon"
+   [(set (match_operand:DI 0 "s_register_operand"           "=w")
+       (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+                   (match_operand:SI 2 "s_register_operand" " T")]
+                  UNSPEC_ASHIFT_UNSIGNED))]
+   "TARGET_NEON && reload_completed"
+   "vshl.u64\t%P0, %P1, %E2    @ lshr %P0, %P1, %2"
+   [(set_attr "neon_type" "neon_vshl_ddd")]
+ )
+ 
+ (define_insn "ashrdi3_neon_imm_noclobber"
+   [(set (match_operand:DI 0 "s_register_operand"            "=w")
+       (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+                    (match_operand:SI 2 "const_int_operand"  " i")))]
+   "TARGET_NEON && reload_completed
+    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
+   "vshr.s64\t%P0, %P1, %2"
+   [(set_attr "neon_type" "neon_vshl_ddd")]
+ )
+ 
+ (define_insn "lshrdi3_neon_imm_noclobber"
+   [(set (match_operand:DI 0 "s_register_operand"            "=w")
+       (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
+                    (match_operand:SI 2 "const_int_operand"  " i")))]
+   "TARGET_NEON && reload_completed
+    && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
+   "vshr.u64\t%P0, %P1, %2"
+   [(set_attr "neon_type" "neon_vshl_ddd")]
+ )
+ 
+ ;; ashrdi3_neon
+ ;; lshrdi3_neon
+ (define_insn_and_split "<shift>di3_neon"
+   [(set (match_operand:DI 0 "s_register_operand"           "= w, 
w,?&r,?r,?w,?w")
+       (rshifts:DI (match_operand:DI 1 "s_register_operand" "  w, w, 0r, r, w, 
w")
+                   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, 
i")))
+    (clobber (match_scratch:SI 3                                    "= r, X, 
&r, X, r, X"))
+    (clobber (match_scratch:SI 4                                    "=&T, X, 
&r, X,&T, X"))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_NEON"
+   "#"
+   "TARGET_NEON && reload_completed"
+   [(const_int 0)]
+   "
+   {
+     if (IS_VFP_REGNUM (REGNO (operands[0])))
+       {
+       if (CONST_INT_P (operands[2]))
+         {
+           if (INTVAL (operands[2]) < 1)
+             {
+               emit_insn (gen_movdi (operands[0], operands[1]));
+               DONE;
+             }
+           else if (INTVAL (operands[2]) > 64)
+             operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
+ 
+           /* Ditch the unnecessary clobbers.  */
+           emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
+                                                         operands[1],
+                                                         operands[2]));
+         }
+       else 
+         {
+           /* We must use a negative left-shift.  */
+           emit_insn (gen_negsi2 (operands[3], operands[2]));
+           emit_insn (gen_rtx_SET (SImode, operands[4], operands[3]));
+           emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
+                                                      operands[4]));
+         }
+       }
+     else
+       {
+       if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+         /* This clobbers CC.  */
+         emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
+       else
+         /* This clobbers CC (ASHIFTRT by register only).  */
+         arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
+                                        operands[2], operands[3], operands[4]);
+       }
+ 
+     DONE;
+   }"
+   [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+    (set_attr "opt" "*,*,speed,speed,*,*")]
+ )
+ 
  ;; Widening operations
  
  (define_insn "widen_ssum<mode>3"
Index: gcc/config/arm/constraints.md
===================================================================
*** gcc/config/arm/constraints.md       (revision 191254)
--- gcc/config/arm/constraints.md       (working copy)
***************
*** 19,25 ****
  ;; <http://www.gnu.org/licenses/>.
  
  ;; The following register constraints have been used:
! ;; - in ARM/Thumb-2 state: t, w, x, y, z
  ;; - in Thumb state: h, b
  ;; - in both states: l, c, k
  ;; In ARM state, 'l' is an alias for 'r'
--- 19,25 ----
  ;; <http://www.gnu.org/licenses/>.
  
  ;; The following register constraints have been used:
! ;; - in ARM/Thumb-2 state: t, T, w, x, y, z
  ;; - in Thumb state: h, b
  ;; - in both states: l, c, k
  ;; In ARM state, 'l' is an alias for 'r'
***************
*** 44,49 ****
--- 44,52 ----
  (define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS"
   "The VFP registers @code{s0}-@code{s31}.")
  
+ (define_register_constraint "T" "TARGET_32BIT ? VFP_LO_REGS_EVEN : NO_REGS"
+  "The even numbered VFP registers @code{s0}-@code{s31}.")
+ 
  (define_register_constraint "w"
    "TARGET_32BIT ? (TARGET_VFPD32 ? VFP_REGS : VFP_LO_REGS) : NO_REGS"
   "The VFP registers @code{d0}-@code{d15}, or @code{d0}-@code{d31} for VFPv3.")
Index: gcc/config/arm/iterators.md
===================================================================
*** gcc/config/arm/iterators.md (revision 191254)
--- gcc/config/arm/iterators.md (working copy)
***************
*** 188,193 ****
--- 188,196 ----
  ;; A list of widening operators
  (define_code_iterator SE [sign_extend zero_extend])
  
+ ;; Right shifts
+ (define_code_iterator rshifts [ashiftrt lshiftrt])
+ 
  ;;----------------------------------------------------------------------------
  ;; Mode attributes
  ;;----------------------------------------------------------------------------
***************
*** 449,451 ****
--- 452,459 ----
  
  ;; Assembler mnemonics for signedness of widening operations.
  (define_code_attr US [(sign_extend "s") (zero_extend "u")])
+ 
+ ;; Right shifts
+ (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
+ (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
+ 
Index: gcc/config/arm/arm.md
===================================================================
*** gcc/config/arm/arm.md       (revision 191254)
--- gcc/config/arm/arm.md       (working copy)
***************
*** 249,254 ****
--- 249,270 ----
  
        (const_string "no")))
  
+ (define_attr "opt" "any,speed,size"
+   (const_string "any"))
+ 
+ (define_attr "opt_enabled" "no,yes"
+   (cond [(eq_attr "opt" "any")
+          (const_string "yes")
+ 
+        (and (eq_attr "opt" "speed")
+             (match_test "optimize_function_for_speed_p (cfun)"))
+        (const_string "yes")
+ 
+        (and (eq_attr "opt" "size")
+             (match_test "optimize_function_for_size_p (cfun)"))
+        (const_string "yes")]
+       (const_string "no")))
+ 
  ; Allows an insn to disable certain alternatives for reasons other than
  ; arch support.
  (define_attr "insn_enabled" "no,yes"
***************
*** 256,266 ****
  
  ; Enable all alternatives that are both arch_enabled and insn_enabled.
   (define_attr "enabled" "no,yes"
!    (if_then_else (eq_attr "insn_enabled" "yes")
!                (if_then_else (eq_attr "arch_enabled" "yes")
!                              (const_string "yes")
!                              (const_string "no"))
!                 (const_string "no")))
  
  ; POOL_RANGE is how far away from a constant pool entry that this insn
  ; can be placed.  If the distance is zero, then this insn will never
--- 272,286 ----
  
  ; Enable all alternatives that are both arch_enabled and insn_enabled.
   (define_attr "enabled" "no,yes"
!    (cond [(eq_attr "insn_enabled" "no")
!         (const_string "no")
! 
!         (eq_attr "arch_enabled" "no")
!         (const_string "no")
! 
!         (eq_attr "opt_enabled" "no")
!         (const_string "no")]
!        (const_string "yes")))
  
  ; POOL_RANGE is how far away from a constant pool entry that this insn
  ; can be placed.  If the distance is zero, then this insn will never
***************
*** 3492,3498 ****
                     (match_operand:SI 2 "reg_or_int_operand" "")))]
    "TARGET_32BIT"
    "
!   if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
      ; /* No special preparation statements; expand pattern as above.  */
    else
      {
--- 3512,3525 ----
                     (match_operand:SI 2 "reg_or_int_operand" "")))]
    "TARGET_32BIT"
    "
!   if (TARGET_NEON)
!     {
!       /* Delay the decision whether to use NEON or core-regs until
!        register allocation.  */
!       emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2]));
!       DONE;
!     }
!   else if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
      ; /* No special preparation statements; expand pattern as above.  */
    else
      {
***************
*** 3566,3572 ****
                       (match_operand:SI 2 "reg_or_int_operand" "")))]
    "TARGET_32BIT"
    "
!   if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
      ; /* No special preparation statements; expand pattern as above.  */
    else
      {
--- 3593,3606 ----
                       (match_operand:SI 2 "reg_or_int_operand" "")))]
    "TARGET_32BIT"
    "
!   if (TARGET_NEON)
!     {
!       /* Delay the decision whether to use NEON or core-regs until
!        register allocation.  */
!       emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2]));
!       DONE;
!     }
!   else if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
      ; /* No special preparation statements; expand pattern as above.  */
    else
      {
***************
*** 3638,3644 ****
                       (match_operand:SI 2 "reg_or_int_operand" "")))]
    "TARGET_32BIT"
    "
!   if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
      ; /* No special preparation statements; expand pattern as above.  */
    else
      {
--- 3672,3685 ----
                       (match_operand:SI 2 "reg_or_int_operand" "")))]
    "TARGET_32BIT"
    "
!   if (TARGET_NEON)
!     {
!       /* Delay the decision whether to use NEON or core-regs until
!        register allocation.  */
!       emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2]));
!       DONE;
!     }
!   else if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
      ; /* No special preparation statements; expand pattern as above.  */
    else
      {
-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com

Reply via email to