Hi,
I am trying to implement interleave_high/low and extract_even/odd
using vzip and vuzp instructions. I am attaching a patch that attempts
to do that. It uses already existing neon_vzip<mode>_internal. The
problem with it is that it doesn't express the fact that the two
outputs of vzip depend on both inputs, which causes wrong code
generation in CSE:
for
(a,b)<- vzip (c,d)
and
(e,f) <- vzip (g,d)
CSE decides that b==f, since on RTL level b and f depend only on d.
Here is neon_vzip<mode>_internal:
(define_insn "neon_vzip<mode>_internal"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
UNSPEC_VZIP1))
(set (match_operand:VDQW 2 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
UNSPEC_VZIP2))]
"TARGET_NEON"
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
(const_string "neon_bp_simple")
(const_string "neon_bp_3cycle")))]
)
Is there a way to properly mark the dependence?
Thanks,
Ira
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md (revision 168987)
+++ config/arm/neon.md (working copy)
@@ -143,7 +143,11 @@
(UNSPEC_VZIP2 204)
(UNSPEC_MISALIGNED_ACCESS 205)
(UNSPEC_VCLE 206)
- (UNSPEC_VCLT 207)])
+ (UNSPEC_VCLT 207)
+ (UNSPEC_EXTEVEN 208)
+ (UNSPEC_EXTODD 209)
+ (UNSPEC_INTERHI 210)
+ (UNSPEC_INTERLO 211)])
;; Attribute used to permit string comparisons against <VQH_mnem> in
@@ -5469,3 +5473,76 @@
emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
DONE;
})
+
+(define_expand "vec_extract_even<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+ (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+ (match_operand:VDQW 2 "register_operand" "")]
+ UNSPEC_EXTEVEN))]
+ "TARGET_NEON"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_neon_vuzp<mode>_internal (tmp, operands[1], operands[0],
+ operands[2]));
+ else
+ emit_insn (gen_neon_vuzp<mode>_internal (operands[0], operands[1], tmp,
+ operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extract_odd<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+ (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+ (match_operand:VDQW 2 "register_operand" "")]
+ UNSPEC_EXTODD))]
+ "TARGET_NEON"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_neon_vuzp<mode>_internal (operands[0], operands[1], tmp,
+ operands[2]));
+ else
+ emit_insn (gen_neon_vuzp<mode>_internal (tmp, operands[1], operands[0],
+ operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_high<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+ (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+ (match_operand:VDQW 2 "register_operand" "")]
+ UNSPEC_INTERHI))]
+ "TARGET_NEON"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_neon_vzip<mode>_internal (operands[0], operands[1], tmp,
+ operands[2]));
+ else
+ emit_insn (gen_neon_vzip<mode>_internal (tmp, operands[1], operands[0],
+ operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_low<mode>"
+ [(set (match_operand:VDQW 0 "register_operand" "")
+ (unspec:VDQW [(match_operand:VDQW 1 "register_operand" "")
+ (match_operand:VDQW 2 "register_operand" "")]
+ UNSPEC_INTERLO))]
+ "TARGET_NEON"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_neon_vzip<mode>_internal (tmp, operands[1], operands[0],
+ operands[2]));
+ else
+ emit_insn (gen_neon_vzip<mode>_internal (operands[0], operands[1], tmp,
+ operands[2]));
+ DONE;
+})
+
_______________________________________________
linaro-toolchain mailing list
[email protected]
http://lists.linaro.org/mailman/listinfo/linaro-toolchain