Hi, This patch (originally by Jie Zhang) optimizes vdup builtins which use a constant argument into the immediate variants of the vdup instructions, rather than generating separate immediate-loads and register vdups, as is presently done. It also adds support for loading floating-point constant 0.0 to such instructions, by using integer zero-load instructions (the bit pattern is the same).
Tested with no regressions, and the new tests pass. One test needed tweaking, since with the patch the (constant) calculations performed now get entirely optimised away. OK to apply? Thanks, Julian ChangeLog Jie Zhang <jzhang...@gmail.com> Julian Brown <jul...@codesourcery.com> gcc/ * config/arm/arm.c (arm_rtx_costs_1): Adjust cost for CONST_VECTOR. (arm_size_rtx_costs): Likewise. (neon_valid_immediate): Add a case for double 0.0. gcc/testsuite/ * gcc.target/arm/neon-vdup-1.c: New test case. * gcc.target/arm/neon-vdup-2.c: New test case. * gcc.target/arm/neon-vdup-3.c: New test case. * gcc.target/arm/neon-vdup-4.c: New test case. * gcc.target/arm/neon-vdup-5.c: New test case. * gcc.target/arm/neon-vdup-6.c: New test case. * gcc.target/arm/neon-vdup-7.c: New test case. * gcc.target/arm/neon-vdup-8.c: New test case. * gcc.target/arm/neon-vdup-9.c: New test case. * gcc.target/arm/neon-vdup-10.c: New test case. * gcc.target/arm/neon-vdup-11.c: New test case. * gcc.target/arm/neon-vdup-12.c: New test case. * gcc.target/arm/neon-vdup-13.c: New test case. * gcc.target/arm/neon-vdup-14.c: New test case. * gcc.target/arm/neon-vdup-15.c: New test case. * gcc.target/arm/neon-vdup-16.c: New test case. * gcc.target/arm/neon-vdup-17.c: New test case. * gcc.target/arm/neon-vdup-18.c: New test case. * gcc.target/arm/neon-vdup-19.c: New test case. * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: Make intrinsic arguments non-constant.
commit 1e3a8ea7fbaed4cd6638b08fb18b951a9c02f889 Author: Julian Brown <jbr...@build6-lucid-cs.sje.mentorg.com> Date: Fri Jul 13 07:13:09 2012 -0700 Optimize NEON vdup builtins. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 9748dda..7eedb18 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -7608,6 +7608,17 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) } return true; + case CONST_VECTOR: + if (TARGET_NEON + && TARGET_HARD_FLOAT + && outer == SET + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && neon_immediate_valid_for_move (x, mode, NULL, NULL)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + return true; + default: *total = COSTS_N_INSNS (4); return false; @@ -7948,6 +7959,17 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, *total = COSTS_N_INSNS (4); return true; + case CONST_VECTOR: + if (TARGET_NEON + && TARGET_HARD_FLOAT + && outer_code == SET + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && neon_immediate_valid_for_move (x, mode, NULL, NULL)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + return true; + case HIGH: case LO_SUM: /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the @@ -8768,11 +8790,14 @@ vfp3_const_double_rtx (rtx x) vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh vmov f32 18 aBbbbbbc defgh000 00000000 00000000 + vmov f32 19 00000000 00000000 00000000 00000000 For case 18, B = !b. Representable values are exactly those accepted by vfp3_const_double_index, but are output as floating-point numbers rather than indices. + For case 19, we will change it to vmov.i32 when assembling. + Variants 0-5 (inclusive) may also be used as immediates for the second operand of VORR/VBIC instructions. @@ -8829,7 +8854,7 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, rtx el0 = CONST_VECTOR_ELT (op, 0); REAL_VALUE_TYPE r0; - if (!vfp3_const_double_rtx (el0)) + if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0))) return -1; REAL_VALUE_FROM_CONST_DOUBLE (r0, el0); @@ -8851,7 +8876,10 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, if (elementwidth) *elementwidth = 0; - return 18; + if (el0 == CONST0_RTX (GET_MODE (el0))) + return 19; + else + return 18; } /* Splat vector constant out into a byte vector. */ diff --git a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c index ad6ba75..fe3d78b 100644 --- a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c +++ b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c @@ -4,10 +4,8 @@ /* { dg-add-options arm_neon } */ #include <arm_neon.h> -float32x2_t f_sub_abs_to_vabd_32() +float32x2_t f_sub_abs_to_vabd_32(float32x2_t val1, float32x2_t val2) { - float32x2_t val1 = vdup_n_f32 (10); - float32x2_t val2 = vdup_n_f32 (30); float32x2_t sres = vsub_f32(val1, val2); float32x2_t res = vabs_f32 (sres); @@ -16,10 +14,8 @@ float32x2_t f_sub_abs_to_vabd_32() /* { dg-final { scan-assembler "vabd\.f32" } }*/ #include <arm_neon.h> -int8x8_t sub_abs_to_vabd_8() +int8x8_t sub_abs_to_vabd_8(int8x8_t val1, int8x8_t val2) { - int8x8_t val1 = vdup_n_s8 (10); - int8x8_t val2 = vdup_n_s8 (30); int8x8_t sres = vsub_s8(val1, val2); int8x8_t res = vabs_s8 (sres); @@ -27,10 +23,8 @@ int8x8_t sub_abs_to_vabd_8() } /* { dg-final { scan-assembler "vabd\.s8" } }*/ -int16x4_t sub_abs_to_vabd_16() +int16x4_t sub_abs_to_vabd_16(int16x4_t val1, int16x4_t val2) { - int16x4_t val1 = vdup_n_s16 (10); - int16x4_t val2 = vdup_n_s16 (30); int16x4_t sres = vsub_s16(val1, val2); int16x4_t res = vabs_s16 (sres); @@ -38,10 +32,8 @@ int16x4_t sub_abs_to_vabd_16() } /* { dg-final { scan-assembler "vabd\.s16" } }*/ -int32x2_t sub_abs_to_vabd_32() +int32x2_t sub_abs_to_vabd_32(int32x2_t val1, int32x2_t val2) { - int32x2_t val1 = vdup_n_s32 (10); - int32x2_t val2 = vdup_n_s32 (30); int32x2_t sres = vsub_s32(val1, val2); int32x2_t res = vabs_s32 (sres); diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-1.c b/gcc/testsuite/gcc.target/arm/neon-vdup-1.c new file mode 100644 index 0000000..41799a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-1.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_f32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +float32x4_t out_float32x4_t; +void test_vdupq_nf32 (void) +{ + out_float32x4_t = vdupq_n_f32 (0.0); +} + +/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[qQ\]\[0-9\]+, #0\.0\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-10.c b/gcc/testsuite/gcc.target/arm/neon-vdup-10.c new file mode 100644 index 0000000..a06b064 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-10.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (~0x12000000); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #3992977407\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-11.c b/gcc/testsuite/gcc.target/arm/neon-vdup-11.c new file mode 100644 index 0000000..07d0889 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-11.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint16x8_t out_uint16x8_t; +void test_vdupq_nu16 (void) +{ + out_uint16x8_t = vdupq_n_u16 (0x12); +} + +/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #18\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-12.c b/gcc/testsuite/gcc.target/arm/neon-vdup-12.c new file mode 100644 index 0000000..27b4186 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-12.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint16x8_t out_uint16x8_t; +void test_vdupq_nu16 (void) +{ + out_uint16x8_t = vdupq_n_u16 (0x1200); +} + +/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #4608\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-13.c b/gcc/testsuite/gcc.target/arm/neon-vdup-13.c new file mode 100644 index 0000000..4d38bc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-13.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint16x8_t out_uint16x8_t; +void test_vdupq_nu16 (void) +{ + out_uint16x8_t = vdupq_n_u16 (~0x12); +} + +/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #65517\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-14.c b/gcc/testsuite/gcc.target/arm/neon-vdup-14.c new file mode 100644 index 0000000..a16659f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-14.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u16' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint16x8_t out_uint16x8_t; +void test_vdupq_nu16 (void) +{ + out_uint16x8_t = vdupq_n_u16 (~0x1200); +} + +/* { dg-final { scan-assembler "vmov\.i16\[ \]+\[qQ\]\[0-9\]+, #60927\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-15.c b/gcc/testsuite/gcc.target/arm/neon-vdup-15.c new file mode 100644 index 0000000..84a6fe0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-15.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u8' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint8x16_t out_uint8x16_t; +void test_vdupq_nu8 (void) +{ + out_uint8x16_t = vdupq_n_u8 (0x12); +} + +/* { dg-final { scan-assembler "vmov\.i8\[ \]+\[qQ\]\[0-9\]+, #18\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-16.c b/gcc/testsuite/gcc.target/arm/neon-vdup-16.c new file mode 100644 index 0000000..70bec03 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-16.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (0x12ff); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4863\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-17.c b/gcc/testsuite/gcc.target/arm/neon-vdup-17.c new file mode 100644 index 0000000..e0283f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-17.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (0x12ffff); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #1245183\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-18.c b/gcc/testsuite/gcc.target/arm/neon-vdup-18.c new file mode 100644 index 0000000..7dcf85d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-18.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (~0x12ff); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4294962432\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-19.c b/gcc/testsuite/gcc.target/arm/neon-vdup-19.c new file mode 100644 index 0000000..0980437 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-19.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (~0x12ffff); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4293722112\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-2.c b/gcc/testsuite/gcc.target/arm/neon-vdup-2.c new file mode 100644 index 0000000..f9e6a72 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-2.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_f32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +float32x4_t out_float32x4_t; +void test_vdupq_nf32 (void) +{ + out_float32x4_t = vdupq_n_f32 (0.125); +} + +/* { dg-final { scan-assembler "vmov\.f32\[ \]+\[qQ\]\[0-9\]+, #1\.25e-1\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-3.c b/gcc/testsuite/gcc.target/arm/neon-vdup-3.c new file mode 100644 index 0000000..d407316 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-3.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (0x12); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #18\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-4.c b/gcc/testsuite/gcc.target/arm/neon-vdup-4.c new file mode 100644 index 0000000..bc1be07 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-4.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (0x1200); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4608\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-5.c b/gcc/testsuite/gcc.target/arm/neon-vdup-5.c new file mode 100644 index 0000000..9b04f16 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-5.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (0x120000); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #1179648\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-6.c b/gcc/testsuite/gcc.target/arm/neon-vdup-6.c new file mode 100644 index 0000000..0889b80 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-6.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (0x12000000); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #301989888\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-7.c b/gcc/testsuite/gcc.target/arm/neon-vdup-7.c new file mode 100644 index 0000000..f7b1dc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-7.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (~0x12); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4294967277\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-8.c b/gcc/testsuite/gcc.target/arm/neon-vdup-8.c new file mode 100644 index 0000000..9d494c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-8.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (~0x1200); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4294962687\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vdup-9.c b/gcc/testsuite/gcc.target/arm/neon-vdup-9.c new file mode 100644 index 0000000..799e95e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vdup-9.c @@ -0,0 +1,17 @@ +/* Test the optimization of `vdupq_n_u32' ARM Neon intrinsic. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +uint32x4_t out_uint32x4_t; +void test_vdupq_nu32 (void) +{ + out_uint32x4_t = vdupq_n_u32 (~0x120000); +} + +/* { dg-final { scan-assembler "vmov\.i32\[ \]+\[qQ\]\[0-9\]+, #4293787647\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */