On 03/02/2011 05:27 PM, Dmitry Plotnikov wrote:
On 02/28/2011 02:49 AM, Ramana Radhakrishnan wrote:
Hi Dmitry,
Sorry about the delayed review of this patch. Can you please
correct the formatting according to a few comments inline below ?
Can you add some tests to check for the generation of vshl and vlshr
please ? Thanks in advance.
cheers
Ramana
Thank you for review!
Fixed. New patch attached.
Ok now ?
There was a mistake in the patch. According to documentation vshl
should have .i suffix, while vshr should have .s or .u .
The fixed patch is attached.
gcc/config/arm/
2010-11-25 Dmitry Plotnikov <dplotni...@ispras.ru>
Dmitry Melnik <d...@ispras.ru>
* arm.c (neon_immediate_valid_for_shift): New function.
(neon_output_shift_immediate): New function.
* neon.md (vashl<mode>3): Modified constraint.
(vashr<mode>3_imm): New insn pattern.
(vlshr<mode>3_imm): New insn pattern.
(vashr<mode>3): Modified constraint.
(vlshr<mode>3): Modified constraint.
* predicates.md (imm_for_neon_lshift_operand): New predicate.
(imm_for_neon_rshift_operand): New predicate.
(imm_lshift_or_reg_neon): New predicate.
(imm_rshift_or_reg_neon): New predicate.
gcc/
2010-11-25 Dmitry Plotnikov <dplotni...@ispras.ru>
Dmitry Melnik <d...@ispras.ru>
* optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr.
gcc/testsuite/
2010-11-25 Dmitry Plotnikov <dplotni...@ispras.ru>
Dmitry Melnik <d...@ispras.ru>
* gcc.target/arm/neon-vshr-imm-1.c: New testcase.
* gcc.target/arm/neon-vshl-imm-1.c: New testcase.
* gcc.target/arm/neon-vlshr-imm-1.c: New testcase.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 88c43e3..8b91c45 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8508,6 +8508,66 @@ neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
return 1;
}
+/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
+ the immediate is valid, write a constant suitable for using as an operand
+ to VSHR/VSHL to *MODCONST and the corresponding element width to
+ *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
+ because they have different limitations. */
+
+bool
+neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
+ rtx *modconst, int *elementwidth,
+ bool isleftshift)
+{
+ unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+ unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
+ unsigned HOST_WIDE_INT last_elt = 0;
+ unsigned HOST_WIDE_INT maxshift;
+
+ /* Split vector constant out into a byte vector. */
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx el = CONST_VECTOR_ELT (op, i);
+ unsigned HOST_WIDE_INT elpart;
+
+ if (GET_CODE (el) == CONST_INT)
+ elpart = INTVAL (el);
+ else if (GET_CODE (el) == CONST_DOUBLE)
+ return 0;
+ else
+ gcc_unreachable ();
+
+ if (i != 0 && elpart != last_elt)
+ return 0;
+
+ last_elt = elpart;
+ }
+
+ /* Shift less than element size. */
+ maxshift = innersize * 8;
+
+ if (isleftshift)
+ {
+ /* Left shift immediate value can be from 0 to <size>-1. */
+ if ((last_elt < 0) || (last_elt >= maxshift))
+ return 0;
+ }
+ else
+ {
+ /* Right shift immediate value can be from 1 to <size>. */
+ if ((last_elt <= 0) || (last_elt > maxshift))
+ return 0;
+ }
+
+ if (elementwidth)
+ *elementwidth = innersize * 8;
+
+ if (modconst)
+ *modconst = CONST_VECTOR_ELT (op, 0);
+
+ return 1;
+}
+
/* Return a string suitable for output of Neon immediate logic operation
MNEM. */
@@ -8530,6 +8590,28 @@ neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
return templ;
}
+/* Return a string suitable for output of Neon immediate shift operation
+ (VSHR or VSHL) MNEM. */
+
+char *
+neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
+ enum machine_mode mode, int quad,
+ bool isleftshift)
+{
+ int width, is_valid;
+ static char templ[40];
+
+ is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
+ gcc_assert (is_valid != 0);
+
+ if (quad)
+ sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
+ else
+ sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
+
+ return templ;
+}
+
/* Output a sequence of pairwise operations to implement a reduction.
NOTE: We do "too much work" here, because pairwise operations work on two
registers-worth of operands in one go. Unfortunately we can't exploit those
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 06bbc52..b87d0a6 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -929,11 +929,53 @@
; SImode elements.
(define_insn "vashl<mode>3"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
+ (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
+ (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
+ "TARGET_NEON"
+ {
+ switch (which_alternative)
+ {
+ case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+ case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
+ <MODE>mode,
+ VALID_NEON_QREG_MODE (<MODE>mode),
+ true);
+ default: gcc_unreachable ();
+ }
+ }
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
+)
+
+(define_insn "vashr<mode>3_imm"
[(set (match_operand:VDQIW 0 "s_register_operand" "=w")
- (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
- (match_operand:VDQIW 2 "s_register_operand" "w")))]
+ (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
"TARGET_NEON"
- "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ {
+ return neon_output_shift_immediate ("vshr", 's', &operands[2],
+ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+ false);
+ }
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_vshl_ddd")
+ (const_string "neon_shift_3")))]
+)
+
+(define_insn "vlshr<mode>3_imm"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+ (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
+ "TARGET_NEON"
+ {
+ return neon_output_shift_immediate ("vshr", 'u', &operands[2],
+ <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+ false);
+ }
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
(const_string "neon_vshl_ddd")
@@ -976,29 +1018,35 @@
(define_expand "vashr<mode>3"
[(set (match_operand:VDQIW 0 "s_register_operand" "")
- (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
- (match_operand:VDQIW 2 "s_register_operand" "")))]
+ (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
"TARGET_NEON"
{
rtx neg = gen_reg_rtx (<MODE>mode);
-
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
-
+ if (REG_P (operands[2]))
+ {
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+ }
+ else
+ emit_insn(gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vlshr<mode>3"
[(set (match_operand:VDQIW 0 "s_register_operand" "")
- (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
- (match_operand:VDQIW 2 "s_register_operand" "")))]
+ (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+ (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
"TARGET_NEON"
{
rtx neg = gen_reg_rtx (<MODE>mode);
-
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
-
+ if (REG_P (operands[2]))
+ {
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+ }
+ else
+ emit_insn(gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
DONE;
})
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index e4c6146..26ff1c5 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -585,6 +585,26 @@
return neon_immediate_valid_for_move (op, mode, NULL, NULL);
})
+(define_predicate "imm_for_neon_lshift_operand"
+ (match_code "const_vector")
+{
+ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true);
+})
+
+(define_predicate "imm_for_neon_rshift_operand"
+ (match_code "const_vector")
+{
+ return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false);
+})
+
+(define_predicate "imm_lshift_or_reg_neon"
+ (ior (match_operand 0 "s_register_operand")
+ (match_operand 0 "imm_for_neon_lshift_operand")))
+
+(define_predicate "imm_rshift_or_reg_neon"
+ (ior (match_operand 0 "s_register_operand")
+ (match_operand 0 "imm_for_neon_rshift_operand")))
+
(define_predicate "imm_for_neon_logic_operand"
(match_code "const_vector")
{
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 26735dd..012dae7 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6171,6 +6171,9 @@ init_optabs (void)
init_optab (usashl_optab, US_ASHIFT);
init_optab (ashr_optab, ASHIFTRT);
init_optab (lshr_optab, LSHIFTRT);
+ init_optabv (vashl_optab, ASHIFT);
+ init_optabv (vashr_optab, ASHIFTRT);
+ init_optabv (vlshr_optab, LSHIFTRT);
init_optab (rotl_optab, ROTATE);
init_optab (rotr_optab, ROTATERT);
init_optab (smin_optab, SMIN);
diff --git a/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c b/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c
new file mode 100644
index 0000000..e666371
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */
+
+/* Verify that VSHR immediate is used. */
+void f1(int n, unsigned int x[], unsigned int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] >> 3;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c b/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c
new file mode 100644
index 0000000..913d595
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */
+
+/* Verify that VSHR immediate is used. */
+void f1(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] << 3;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c b/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c
new file mode 100644
index 0000000..8f22a52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */
+
+/* Verify that VSHR immediate is used. */
+void f1(int n, int x[], int y[]) {
+ int i;
+ for (i = 0; i < n; ++i)
+ y[i] = x[i] >> 3;
+}