gcc.c-torture/execute/loop-2e.c was failing at -O3 for lack of an
ashlv2si3 operation, so I added that.
2013-07-29  Joern Rennecke  <joern.renne...@embecosm.com>

        * config/epiphany/epiphany.md (*isub_i+2): New peephole.
        (ashlv2si3): New expander.
        (*ashlv2si3_i): New define_insn_and_split.
        * predicates.md (float_operation): Allow patterns with three
        basic sub-patterns.

Index: config/epiphany/epiphany.md
===================================================================
--- config/epiphany/epiphany.md (revision 201313)
+++ config/epiphany/epiphany.md (working copy)
@@ -587,7 +587,7 @@ (define_insn "subsi3_i"
 ; After mode-switching, floating point operations, fp_sfuncs and calls
 ; must exhibit the use of the control register, lest the setting of the
 ; control register could be deleted or moved.  OTOH a use of a hard register
-; greatly coundounds optimizers like the rtl loop optimizers or combine.
+; greatly counfounds optimizers like the rtl loop optimizers or combine.
 ; Therefore, we put an extra pass immediately after the mode switching pass
 ; that inserts the USEs of the control registers, and sets a flag in struct
 ; machine_function that float_operation can henceforth only match with that
@@ -1058,6 +1058,28 @@ (define_peephole2
              (clobber (reg:CC CC_REGNUM))])]
 )
 
+(define_peephole2
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:SI 0 "gpr_operand" "")
+          (mult:SI
+             (match_operand:SI 1 "gpr_operand" "")
+             (match_operand:SI 2 "gpr_operand" "")))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  "prev_active_insn (peep2_next_insn (0))
+   && get_attr_sched_use_fpu (prev_active_insn (peep2_next_insn (0)))
+   && peep2_regno_dead_p (1, CC_REGNUM)
+   && get_attr_sched_use_fpu (next_active_insn (peep2_next_insn (0)))
+   && find_reg_note (insn, REG_EQUAL, NULL_RTX) != NULL_RTX
+   && GET_CODE (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0)) == MULT
+   && CONST_INT_P (XEXP (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0),
+                        1))"
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 4)))
+             (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[4]
+    = XEXP (XEXP (find_reg_note (curr_insn, REG_EQUAL, NULL_RTX), 0), 1);
+})
+
 (define_expand "mulsi3"
   [(parallel
      [(set (match_operand:SI 0 "gpr_operand" "")
@@ -2530,6 +2552,106 @@ (define_insn_and_split "<insn_opname>v2s
   [(set_attr "length" "8")
    (set_attr "type" "v2fp")])
 
+(define_expand "ashlv2si3"
+  [(parallel
+     [(set (match_operand:V2SI 0 "gpr_operand" "")
+          (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "")
+                       (match_operand:SI 2 "general_operand")))
+      (use (match_dup 3))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+{
+  if (const_int_operand (operands[2], VOIDmode))
+    operands[3]
+      = copy_to_mode_reg (SImode, GEN_INT (1 << INTVAL (operands[2])));
+  else
+    {
+      int o, i;
+      rtx xop[2], last_out = pc_rtx;
+
+      for (o = 0; o <= UNITS_PER_WORD; o += UNITS_PER_WORD)
+       {
+         for (i = 0; i < 2; i++)
+           {
+             xop[i]
+               = (i == 2 ? operands[2]
+                  : simplify_gen_subreg (SImode, operands[i], V2SImode, o));
+             gcc_assert (!reg_overlap_mentioned_p (last_out, xop[i])
+                         /* ??? reg_overlap_mentioned_p doesn't understand
+                            about multi-word SUBREGs.  */
+                         || (GET_CODE (last_out) == SUBREG
+                             && GET_CODE (xop[i]) == SUBREG
+                             && SUBREG_REG (last_out) == SUBREG_REG (xop[i])
+                             && ((SUBREG_BYTE (last_out) & -UNITS_PER_WORD)
+                                 != (SUBREG_BYTE (xop[i]) & 
-UNITS_PER_WORD))));
+           }
+         emit_insn (gen_ashlsi3 (xop[0], xop[1], operands[2]));
+         last_out = xop[0];
+       }
+      DONE;
+    }
+})
+
+(define_insn_and_split "*ashlv2si3_i"
+  [(match_parallel 3 "float_operation"
+     [(set (match_operand:V2SI 0 "gpr_operand" "=&r,*1*2")
+          (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "r,r")
+                       (match_operand 2 "const_int_operand" "n,n")))
+      (use (match_operand:SI 4 "gpr_operand" "r,r"))
+      (clobber (reg:CC_FP CCFP_REGNUM))])]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel
+     [(set (match_dup 5) (mult:SI (match_dup 6) (match_dup 4)))
+          (clobber (reg:CC_FP CCFP_REGNUM))
+          (match_dup 9)
+          (match_dup 10)])
+   (parallel
+     [(set (match_dup 7) (mult:SI (match_dup 8) (match_dup 4)))
+          (clobber (reg:CC_FP CCFP_REGNUM))
+          (match_dup 9)
+          (match_dup 10)])]
+{
+  operands[5] = simplify_gen_subreg (SImode, operands[0], V2SImode, 0);
+  operands[6] = simplify_gen_subreg (SImode, operands[1], V2SImode, 0);
+  operands[7] = simplify_gen_subreg (SImode, operands[0],
+                                    V2SImode, UNITS_PER_WORD);
+  operands[8] = simplify_gen_subreg (SImode, operands[1],
+                                    V2SImode, UNITS_PER_WORD);
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[8]));
+  gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[4]));
+  operands[9] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2);
+  operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1);
+  rtx insn
+    = (gen_rtx_PARALLEL
+       (VOIDmode,
+        gen_rtvec
+         (4,
+          gen_rtx_SET (VOIDmode, operands[5],
+                       gen_rtx_MULT (SImode, operands[6], operands[4])),
+          gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)),
+          operands[9], operands[10])));
+  insn = emit_insn (insn);
+  add_reg_note (insn, REG_EQUAL,
+               gen_rtx_ASHIFT (SImode, operands[6], operands[2]));
+  insn
+    = (gen_rtx_PARALLEL
+       (VOIDmode,
+        gen_rtvec
+         (4,
+          gen_rtx_SET (VOIDmode, operands[7],
+                       gen_rtx_MULT (SImode, operands[8], operands[4])),
+          gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)),
+          operands[9], operands[10])));
+  insn = emit_insn (insn);
+  add_reg_note (insn, REG_EQUAL,
+               gen_rtx_ASHIFT (SImode, operands[7], operands[2]));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "fp_int")])
+
 (define_expand "mul<mode>3"
   [(parallel
      [(set (match_operand:DWV2MODE 0 "gpr_operand" "")
Index: config/epiphany/predicates.md
===================================================================
--- config/epiphany/predicates.md       (revision 201313)
+++ config/epiphany/predicates.md       (working copy)
@@ -292,7 +292,11 @@ (define_predicate "float_operation"
   bool inserted = MACHINE_FUNCTION (cfun)->control_use_inserted;
   int i;
 
-  if (count == 2)
+  if (count == 2
+      /* Vector ashift has an extra use for the constant factor required to
+        implement the shift as multiply.  */
+      || (count == 3 && GET_CODE (XVECEXP (op, 0, 0)) == SET
+         && GET_CODE (XEXP (XVECEXP (op, 0, 0), 1)) == ASHIFT))
     return !inserted;
 
   /* combine / recog will pass any old garbage here before checking the
@@ -302,7 +306,7 @@ (define_predicate "float_operation"
 
   i = 1;
   if (count > 4)
-    for (i = 4; i < count; i++)
+    for (i = 2; i < count; i++)
       {
        rtx x = XVECEXP (op, 0, i);
 

Reply via email to