Hi,

I am trying to implement interleave_high/low and extract_even/odd
using vzip and vuzp instructions. I am attaching a patch that attempts
to do that. It uses already existing neon_vzip<mode>_internal. The
problem with it is that it doesn't express the fact that the two
outputs of vzip depend on both inputs, which causes wrong code
generation in CSE:
for
(a,b)<- vzip (c,d)
and
(e,f) <- vzip (g,d)
CSE decides that b==f, since on RTL level b and f depend only on d.

Here is neon_vzip<mode>_internal:

(define_insn "neon_vzip<mode>_internal"
  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
                    UNSPEC_VZIP1))
   (set (match_operand:VDQW 2 "s_register_operand" "=w")
        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
                    UNSPEC_VZIP2))]
  "TARGET_NEON"
  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
  [(set (attr "neon_type")
      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                    (const_string "neon_bp_simple")
                    (const_string "neon_bp_3cycle")))]
)

Is there a way to properly mark the dependence?

Thanks,
Ira
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 168987)
+++ config/arm/neon.md  (working copy)
@@ -143,7 +143,11 @@
    (UNSPEC_VZIP2               204)
    (UNSPEC_MISALIGNED_ACCESS   205)
    (UNSPEC_VCLE                        206)
-   (UNSPEC_VCLT                        207)])
+   (UNSPEC_VCLT                        207)
+   (UNSPEC_EXTEVEN              208)
+   (UNSPEC_EXTODD               209)
+   (UNSPEC_INTERHI              210)
+   (UNSPEC_INTERLO              211)])

 ;; Attribute used to permit string comparisons against <VQH_mnem> in
@@ -5469,3 +5473,76 @@
   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
   DONE;
 })
+
+(define_expand "vec_extract_even<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_EXTEVEN))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vuzp<mode>_internal (tmp, operands[1], operands[0],
+                                             operands[2]));
+  else
+    emit_insn (gen_neon_vuzp<mode>_internal (operands[0], operands[1], tmp,
+                                             operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extract_odd<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_EXTODD))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vuzp<mode>_internal (operands[0], operands[1], tmp,
+                                             operands[2]));
+  else
+    emit_insn (gen_neon_vuzp<mode>_internal (tmp, operands[1], operands[0],
+                                             operands[2]));
+  DONE;
+})
+
+(define_expand "vec_interleave_high<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_INTERHI))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vzip<mode>_internal (operands[0], operands[1], tmp,
+                                             operands[2]));
+  else
+    emit_insn (gen_neon_vzip<mode>_internal (tmp, operands[1], operands[0],
+                                             operands[2]));
+  DONE;
+})
+
+(define_expand "vec_interleave_low<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+        (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+                      (match_operand:VDQW 2 "register_operand" "")]
+                      UNSPEC_INTERLO))]
+ "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_neon_vzip<mode>_internal (tmp, operands[1], operands[0],
+                                             operands[2]));
+  else
+    emit_insn (gen_neon_vzip<mode>_internal (operands[0], operands[1], tmp,
+                                             operands[2]));
+  DONE;
+})
+
_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to