Interesting but I would be a bit defensive and make sure that this
matches only if -ffast-math in the FP case. You are sort of relying on
the fact that vsub wouldn't be generated without ffast-math but I'd
rather be defensive about it . (This is in case it's not clear in the
non-intrinsics case).
Fixed.
BTW was SPEC2k built with -Ofast ? Maybe then you'll see a bit of vectorization.
Yes, I built it with -Ofast. I think it's because SPEC2K tests mostly
use doubles, which are not supported by vabd.
--
Best regards,
Dmitry
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index a8c1b87..aceb564 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -5607,3 +5607,32 @@
emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
DONE;
})
+
+(define_insn "neon_vabd<mode>_2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vabd<mode>_3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")]
+ UNSPEC_VSUB)))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
diff --git a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
new file mode 100644
index 0000000..ad6ba75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+float32x2_t f_sub_abs_to_vabd_32()
+{
+ float32x2_t val1 = vdup_n_f32 (10);
+ float32x2_t val2 = vdup_n_f32 (30);
+ float32x2_t sres = vsub_f32(val1, val2);
+ float32x2_t res = vabs_f32 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.f32" } }*/
+
+#include <arm_neon.h>
+int8x8_t sub_abs_to_vabd_8()
+{
+ int8x8_t val1 = vdup_n_s8 (10);
+ int8x8_t val2 = vdup_n_s8 (30);
+ int8x8_t sres = vsub_s8(val1, val2);
+ int8x8_t res = vabs_s8 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s8" } }*/
+
+int16x4_t sub_abs_to_vabd_16()
+{
+ int16x4_t val1 = vdup_n_s16 (10);
+ int16x4_t val2 = vdup_n_s16 (30);
+ int16x4_t sres = vsub_s16(val1, val2);
+ int16x4_t res = vabs_s16 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s16" } }*/
+
+int32x2_t sub_abs_to_vabd_32()
+{
+ int32x2_t val1 = vdup_n_s32 (10);
+ int32x2_t val2 = vdup_n_s32 (30);
+ int32x2_t sres = vsub_s32(val1, val2);
+ int32x2_t res = vabs_s32 (sres);
+
+ return res;
+}
+/* { dg-final { scan-assembler "vabd\.s32" } }*/