On 1 February 2011 11:47, Ira Rosen <ira.ro...@linaro.org> wrote:

> Thanks a lot! It seems to work. It fixed the problem and I am now
> testing the patch on the rest of the vectorizer testsuite.

After testing only with the vectorizer testsuite (which contains at
least 30 tests for strided accesses), I'd appreciate comments on the
patch before I start full testing (cross testing with qemu doesn't
work so well for NEON).

Thanks,
Ira

ChangeLog:

       * config/arm/neon.md (UNSPEC_EXTEVEN): New constant.
       (UNSPEC_EXTODD, UNSPEC_INTERHI, UNSPEC_INTERLO):
       Likewise.
       (neon_vuzp<mode>_extract): New instruction pattern.
       (vec_extract_even<mode>): New expander.
       (vec_extract_odd<mode>): Likewise.
       (neon_vzip<mode>_interleave): New instruction pattern.
       (vec_interleave_high<mode>): New expander.
       (vec_interleave_low<mode>): Likewise.

testsuite/ChangeLog:

       * lib/target-supports.exp (check_effective_target_vect_extract_even_odd):
       Add NEON to the list of supporting targets.
       (check_effective_target_vect_extract_even_odd_wide,
        check_effective_target_vect_interleave): Likewise.
Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp   (revision 168987)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3083,7 +3083,8 @@ proc check_effective_target_vect_extract_even_odd
              || [istarget i?86-*-*]
              || [istarget x86_64-*-*]
              || [istarget ia64-*-*]
-             || [istarget spu-*-*] } {
+             || [istarget spu-*-*]
+             || [check_effective_target_arm_neon_ok] } {
            set et_vect_extract_even_odd_saved 1
         }
     }
@@ -3106,7 +3107,8 @@ proc check_effective_target_vect_extract_even_odd_wide
              || [istarget i?86-*-*]
              || [istarget x86_64-*-*]
              || [istarget ia64-*-*]
-             || [istarget spu-*-*] } {
+             || [istarget spu-*-*]
+             || [check_effective_target_arm_neon_ok] } {
            set et_vect_extract_even_odd_wide_saved 1
         }
     }
@@ -3128,7 +3130,8 @@ proc check_effective_target_vect_interleave { } {
              || [istarget i?86-*-*]
              || [istarget x86_64-*-*]
              || [istarget ia64-*-*]
-             || [istarget spu-*-*] } {
+             || [istarget spu-*-*]
+            || [check_effective_target_arm_neon_ok] } {
            set et_vect_interleave_saved 1
         }
     }
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 168987)
+++ config/arm/neon.md  (working copy)
@@ -143,7 +143,11 @@
    (UNSPEC_VZIP2               204)
    (UNSPEC_MISALIGNED_ACCESS   205)
    (UNSPEC_VCLE                        206)
-   (UNSPEC_VCLT                        207)])
+   (UNSPEC_VCLT                        207)
+   (UNSPEC_EXTEVEN              208)
+   (UNSPEC_EXTODD               209)
+   (UNSPEC_INTERHI              210)
+   (UNSPEC_INTERLO              211)])


 ;; Attribute used to permit string comparisons against <VQH_mnem> in
@@ -5469,3 +5473,108 @@
   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
   DONE;
 })
+
+(define_insn "neon_vuzp<mode>_extract"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VUZP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
+  "TARGET_NEON"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
+
+(define_expand "vec_extract_even<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_EXTEVEN))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vuzp<mode>_extract (tmp, operands[1], operands[2],
+                                             operands[0]));
+  else
+    emit_insn (gen_neon_vuzp<mode>_extract (operands[0], operands[1],
+                                             operands[2], tmp));
+  DONE;
+})
+
+(define_expand "vec_extract_odd<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_EXTODD))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vuzp<mode>_extract (operands[0], operands[1],
+                                             operands[2], tmp));
+  else
+    emit_insn (gen_neon_vuzp<mode>_extract (tmp, operands[1], operands[2],
+                                             operands[0]));
+  DONE;
+})
+
+(define_insn "neon_vzip<mode>_interleave"
+     [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+    (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
+  "TARGET_NEON"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
+
+(define_expand "vec_interleave_high<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_INTERHI))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vzip<mode>_interleave (operands[0], operands[1],
+                                             operands[2], tmp));
+  else
+    emit_insn (gen_neon_vzip<mode>_interleave (tmp, operands[1], operands[2],
+                                             operands[0]));
+  DONE;
+})
+
+(define_expand "vec_interleave_low<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_INTERLO))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vzip<mode>_interleave (tmp, operands[1], operands[2],
+                                             operands[0]));
+  else
+    emit_insn (gen_neon_vzip<mode>_interleave (operands[0], operands[1],
+                                             operands[2], tmp));
+  DONE;
+})
+
_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to