[PATCH] match.pd: Only merge truncation with conversion for -fno-signed-zeros
This optimisation does not honour signed zeros, so should not be enabled except with -fno-signed-zeros. OK for master? I do not have commit rights for GCC, so if the patch is fine would someone be able to commit for me? The bug is present in all GCC versions from 12.1.0 onwards - is it possible to backport this? Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Thanks, Joe gcc/ChangeLog: * match.pd: Fix truncation pattern for -fno-signed-zeroes gcc/testsuite/ChangeLog: * gcc.target/aarch64/no_merge_trunc_signed_zero.c: New test. --- gcc/match.pd | 2 +- .../aarch64/no_merge_trunc_signed_zero.c | 24 +++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c diff --git a/gcc/match.pd b/gcc/match.pd index 9ce313323a3..45c34c810cf 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4857,7 +4857,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) #if GIMPLE (simplify (float (fix_trunc @0)) - (if (!flag_trapping_math + (if (!flag_trapping_math && !HONOR_SIGNED_ZEROS(type) && types_match (type, TREE_TYPE (@0)) && direct_internal_fn_supported_p (IFN_TRUNC, type, OPTIMIZE_FOR_BOTH)) diff --git a/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c new file mode 100644 index 000..b2c93e55567 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-trapping-math -fsigned-zeros" } */ + +#include + +float +f1 (float x) +{ + return (int) rintf(x); +} + +double +f2 (double x) +{ + return (long) rint(x); +} + +/* { dg-final { scan-assembler "frintx\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "frintx\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\td\[0-9\]+, d\[0-9\]+" } } */ + -- 2.27.0
[PATCH v2] match.pd: Only merge truncation with conversion for -fno-signed-zeros
This optimisation does not honour signed zeros, so should not be enabled except with -fno-signed-zeros. OK for master? I do not have commit rights for GCC, so if the patch is fine would someone be able to commit for me? The bug is present in all GCC versions from 12.1.0 onwards - is it possible to backport this? Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Thanks, Joe gcc/ChangeLog: * match.pd: Fix truncation pattern for -fno-signed-zeroes gcc/testsuite/ChangeLog: * gcc.target/aarch64/no_merge_trunc_signed_zero.c: New test. --- Changes from v1, whitespace change only. gcc/match.pd | 1 + .../aarch64/no_merge_trunc_signed_zero.c | 24 +++ 2 files changed, 25 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c diff --git a/gcc/match.pd b/gcc/match.pd index 9ce313323a3..15a1e7350d4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4858,6 +4858,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (float (fix_trunc @0)) (if (!flag_trapping_math + && !HONOR_SIGNED_ZEROS (type) && types_match (type, TREE_TYPE (@0)) && direct_internal_fn_supported_p (IFN_TRUNC, type, OPTIMIZE_FOR_BOTH)) diff --git a/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c new file mode 100644 index 000..b2c93e55567 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-trapping-math -fsigned-zeros" } */ + +#include + +float +f1 (float x) +{ + return (int) rintf(x); +} + +double +f2 (double x) +{ + return (long) rint(x); +} + +/* { dg-final { scan-assembler "frintx\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "frintx\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\td\[0-9\]+, d\[0-9\]+" } } */ + -- 2.27.0
[PATCH][Backport][GCC12] match.pd: Only merge truncation with conversion for -fno-signed-zeros
This optimisation does not honour signed zeros, so should not be enabled except with -fno-signed-zeros. Cherry-pick of 7dd3b2b09cbeb6712ec680a0445cb0ad41070423. Applies cleanly on releases/gcc-12. Regression-tested, only new failure is in gcc/testsuite/c-c++-common/hwasan/large-aligned-1.c which I believe is unrelated and known to be flaky. OK for backport to GCC 12? If so, please commit for me as I do not have commit rights in GCC. Thanks, Joe gcc/ChangeLog: * match.pd: Fix truncation pattern for -fno-signed-zeroes gcc/testsuite/ChangeLog: * gcc.target/aarch64/no_merge_trunc_signed_zero.c: New test. --- gcc/match.pd | 1 + .../aarch64/no_merge_trunc_signed_zero.c | 24 +++ 2 files changed, 25 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c diff --git a/gcc/match.pd b/gcc/match.pd index c5a4426e76b..0938d56fa45 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3825,6 +3825,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (float (fix_trunc @0)) (if (!flag_trapping_math + && !HONOR_SIGNED_ZEROS (type) && types_match (type, TREE_TYPE (@0)) && direct_internal_fn_supported_p (IFN_TRUNC, type, OPTIMIZE_FOR_BOTH)) diff --git a/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c new file mode 100644 index 000..b2c93e55567 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-trapping-math -fsigned-zeros" } */ + +#include + +float +f1 (float x) +{ + return (int) rintf(x); +} + +double +f2 (double x) +{ + return (long) rint(x); +} + +/* { dg-final { scan-assembler "frintx\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "frintx\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\td\[0-9\]+, d\[0-9\]+" } } */ + -- 2.27.0
[PATCH][Backport][GCC13] match.pd: Only merge truncation with conversion for -fno-signed-zeros
This optimisation does not honour signed zeros, so should not be enabled except with -fno-signed-zeros. Cherry-pick of 7dd3b2b09cbeb6712ec680a0445cb0ad41070423. Applies cleanly on releases/gcc-13, regression-tested with no new failures. OK for backport to GCC 13? If so, please commit for me as I do not have commit rights in GCC. Thanks, Joe gcc/ChangeLog: * match.pd: Fix truncation pattern for -fno-signed-zeroes gcc/testsuite/ChangeLog: * gcc.target/aarch64/no_merge_trunc_signed_zero.c: New test. --- gcc/match.pd | 1 + .../aarch64/no_merge_trunc_signed_zero.c | 24 +++ 2 files changed, 25 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c diff --git a/gcc/match.pd b/gcc/match.pd index 47e48fa2ca5..dc34e7ead9f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4188,6 +4188,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (float (fix_trunc @0)) (if (!flag_trapping_math + && !HONOR_SIGNED_ZEROS (type) && types_match (type, TREE_TYPE (@0)) && direct_internal_fn_supported_p (IFN_TRUNC, type, OPTIMIZE_FOR_BOTH)) diff --git a/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c new file mode 100644 index 000..b2c93e55567 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-trapping-math -fsigned-zeros" } */ + +#include + +float +f1 (float x) +{ + return (int) rintf(x); +} + +double +f2 (double x) +{ + return (long) rint(x); +} + +/* { dg-final { scan-assembler "frintx\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "frintx\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "cvtzs\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\td\[0-9\]+, d\[0-9\]+" } } */ + -- 2.27.0
[PATCH] arm: Add +nomve and +nomve.fp options to -mcpu=cortex-m55
From: Joe Ramsay Hi, This patch rearranges feature bits for MVE and FP to implement the following flags for -mcpu=cortex-m55. - +nomve:equivalent to armv8.1-m.main+fp.dp+dsp. - +nomve.fp: equivalent to armv8.1-m.main+mve+fp.dp (+dsp is implied by +mve). - +nofp: equivalent to armv8.1-m.main+mve (+dsp is implied by +mve). - +nodsp:equivalent to armv8.1-m.main+fp.dp. Combinations of the above: - +nomve+nofp: equivalent to armv8.1-m.main+dsp. - +nodsp+nofp: equivalent to armv8.1-m.main. Due to MVE and FP sharing vfp_base, some new syntax was required in the CPU description to implement the concept of 'implied bits'. These are non-named features added to the ISA late, depending on whether one or more features which depend on them are present. This means vfp_base can be present when only one of MVE and FP is removed, but absent when both are removed. Bootstrapped and tested on arm-none-eabi. OK for master? Thanks all! Joe gcc/ChangeLog: 2020-07-31 Joe Ramsay * config/arm/arm-cpus.in: (ALL_FPU_INTERNAL): Remove vfp_base. (VFPv2): Remove vfp_base. (MVE): Remove vfp_base. (vfp_base): Redefine as implied bit dependent on MVE or FP (cortex-m55): Add flags to disable MVE, MVE FP, FP and DSP extensions. * config/arm/arm.c (arm_configure_build_target): Add implied bits to ISA. * config/arm/parsecpu.awk: (gen_isa): Print implied bits and their dependencies to ISA header. (gen_data): Add parsing for implied feature bits. gcc/testsuite/ChangeLog: 2020-07-31 Joe Ramsay * gcc.target/arm/cortex-m55-nodsp-flag.c: New test. * gcc.target/arm/cortex-m55-nodsp-nofp-flag.c: New test. * gcc.target/arm/cortex-m55-nofp-flag.c: New test. * gcc.target/arm/cortex-m55-nofp-nomve-flag.c: New test. * gcc.target/arm/cortex-m55-nomve-flag.c: New test. * gcc.target/arm/cortex-m55-nomve.fp-flag.c: New test. --- gcc/config/arm/arm-cpus.in | 26 --- gcc/config/arm/arm.c | 14 ++ gcc/config/arm/parsecpu.awk| 51 ++ .../gcc.target/arm/cortex-m55-nodsp-flag-hard.c| 15 +++ .../gcc.target/arm/cortex-m55-nodsp-flag-softfp.c | 15 +++ .../arm/cortex-m55-nodsp-nofp-flag-softfp.c| 15 +++ .../gcc.target/arm/cortex-m55-nofp-flag-hard.c | 15 +++ .../gcc.target/arm/cortex-m55-nofp-flag-softfp.c | 15 +++ .../arm/cortex-m55-nofp-nomve-flag-softfp.c| 15 +++ .../gcc.target/arm/cortex-m55-nomve-flag-hard.c| 15 +++ .../gcc.target/arm/cortex-m55-nomve-flag-softfp.c | 15 +++ .../gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c | 15 +++ .../arm/cortex-m55-nomve.fp-flag-softfp.c | 15 +++ 13 files changed, 234 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index c98f8ed..5083028 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -135,10 +135,6 @@ define feature armv8_1m_main # Floating point and Neon extensions. # VFPv1 is not supported in GCC. -# This feature bit is enabled for all VFP, MVE and -# MVE with floating point extensions. -define feature vfp_base - # Vector floating point v2. define feature vfpv2 @@ -251,7 +247,7 @@ define fgroup ALL_SIMD ALL_SIMD_INTERNAL ALL_SIMD_EXTERNAL # List of all FPU bits to strip out if -mfpu is used to override the # default. fp16 is deliberately missing from this list. -define fgroup ALL_FPU_INTERNAL vfp_base vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL +define fgroup ALL_FPU_INTERNAL vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL # Similarly, but including fp16 and other extensions that aren't part of # -mfpu support. define fgroup ALL_FPU_EXTERNAL fp16 bf16 @@ -296,11 +292,11 @@ define fgroup ARMv8r ARMv8a define fgroup ARMv8_1m_main ARMv8m_main armv8_1m_main # Useful combinations. -define fgroup VFPv2vfp_base vfpv2 +define fgroup VFPv2vfpv2 define fgroup VFPv3VFPv2 vfpv3 define fgroup VFPv4VFPv3 vfpv4 f
[PATCH] arm: Remove coercion from scalar argument to vmin & vmax intrinsics
From: Joe Ramsay Hi, This patch fixes an issue with vmin* and vmax* intrinsics which accept a scalar argument. Previously when the scalar was of different width to the vector elements this would generate __ARM_undef. This change allows the scalar argument to be implicitly converted to the correct width. Also tidied up the relevant unit tests, some of which would have passed even if only one of two or three intrinsic calls had compiled correctly. Bootstrapped and tested on arm-none-eabi, gcc and CMSIS_DSP testsuites are clean. OK for trunk? Thanks, Joe gcc/ChangeLog: 2020-08-10 Joe Ramsay * config/arm/arm_mve.h (__arm_vmaxnmavq): Remove coercion of scalar argument. (__arm_vmaxnmvq): Likewise. (__arm_vminnmavq): Likewise. (__arm_vminnmvq): Likewise. (__arm_vmaxnmavq_p): Likewise. (__arm_vmaxnmvq_p): Likewise (and delete duplicate definition). (__arm_vminnmavq_p): Likewise. (__arm_vminnmvq_p): Likewise. (__arm_vmaxavq): Likewise. (__arm_vmaxavq_p): Likewise. (__arm_vmaxvq): Likewise. (__arm_vmaxvq_p): Likewise. (__arm_vminavq): Likewise. (__arm_vminavq_p): Likewise. (__arm_vminvq): Likewise. (__arm_vminvq_p): Likewise. gcc/testsuite/ChangeLog: 2020-08-10 Joe Ramsay * gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c: Add test for mismatched width of scalar argument. * gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_u8.c: Likewise. --- gcc/config/arm/arm_mve.h | 110 ++--- .../gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c | 11 ++- .../gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c
[PATCH] arm: Require MVE memory operand for destination of vst1q intrinsic
From: Joe Ramsay Hi, Previously, the machine description patterns for vst1q accepted a generic memory operand for the destination, which could lead to an unrecognised builtin when expanding vst1q* intrinsics. This change fixes the patterns to only accept MVE memory operands. Tested on arm-none-eabi, clean w.r.t. gcc and CMSIS-DSP testsuites. OK for trunk? Thanks, Joe gcc/ChangeLog: 2020-08-13 Joe Ramsay * config/arm/mve.md (mve_vst1q_f): Require MVE memory operand for destination. (mve_vst1q_): Likewise. gcc/testsuite/ChangeLog: 2020-08-13 Joe Ramsay * gcc.target/arm/mve/intrinsics/vst1q_f16.c: Add test that only MVE memory operand is accepted. * gcc.target/arm/mve/intrinsics/vst1q_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vst1q_u8.c: Likewise. --- gcc/config/arm/mve.md | 4 ++-- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u8.c | 10 +++--- 6 files changed, 37 insertions(+), 17 deletions(-) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 9758862..465b39a 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -9330,7 +9330,7 @@ [(set_attr "length" "4")]) (define_expand "mve_vst1q_f" - [(match_operand: 0 "memory_operand") + [(match_operand: 0 "mve_memory_operand") (unspec: [(match_operand:MVE_0 1 "s_register_operand")] VST1Q_F) ] "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT" @@ -9340,7 +9340,7 @@ }) (define_expand "mve_vst1q_" - [(match_operand:MVE_2 0 "memory_operand") + [(match_operand:MVE_2 0 "mve_memory_operand") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q) ] "TARGET_HAVE_MVE" diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c index 363b4ca..312b746 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c @@ -10,12 +10,16 @@ foo (float16_t * addr, float16x8_t value) vst1q_f16 (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ - void foo1 (float16_t * addr, float16x8_t value) { vst1q (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ +/* { dg-final { scan-assembler-times "vstrh.16" 2 } } */ + +void +foo2 (float16_t a, float16x8_t x) +{ + vst1q (&a, x); +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c index 37c4713..cd14e2c 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c @@ -10,12 +10,16 @@ foo (int16_t * addr, int16x8_t value) vst1q_s16 (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ - void foo1 (int16_t * addr, int16x8_t value) { vst1q (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ +/* { dg-final { scan-assembler-times "vstrh.16" 2 } } */ + +void +foo2 (int16_t a, int16x8_t x) +{ + vst1q (&a, x); +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c index fe5edea..0004c80 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c @@ -10,12 +10,16 @@ foo (int8_t * addr, int8x16_t value) vst1q_s8 (addr, value); } -/* { dg-final { scan-assembler "vstrb.8" } } */ - void foo1 (int8_t * addr, int8x16_t value) { vst1q (addr, value); } -/* { dg-final { scan-assembler "vstrb.8" } } */ +/* { dg-final { scan-assembler-times "vstrb.8" 2 } } */ + +void +foo2 (int8_t a, int8x16_t x) +{ + vst1q (&a, x); +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c index a4c8c1a..248e7ce 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c @@ -10,12 +10,16 @@ foo (uint16_t * addr, uint16x8_t value) vst1q_u16 (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ - void foo1 (uint16_t * addr, uint16x8_t value) { vst1q (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ +/* { dg-final { scan
[PATCH v2][GCC] arm: Add +nomve and +nomve.fp options to -mcpu=cortex-m55
From: Joe Ramsay Hi all, This patch rearranges feature bits for MVE and FP to implement the following flags for -mcpu=cortex-m55. - +nomve:equivalent to armv8.1-m.main+fp.dp+dsp. - +nomve.fp: equivalent to armv8.1-m.main+mve+fp.dp (+dsp is implied by +mve). - +nofp: equivalent to armv8.1-m.main+mve (+dsp is implied by +mve). - +nodsp:equivalent to armv8.1-m.main+fp.dp. Combinations of the above: - +nomve+nofp: equivalent to armv8.1-m.main+dsp. - +nodsp+nofp: equivalent to armv8.1-m.main. Due to MVE and FP sharing vfp_base, some new syntax was required in the CPU description to implement the concept of 'implied bits'. These are non-named features added to the ISA late, depending on whether one or more features which depend on them are present. This means vfp_base can be present when only one of MVE and FP is removed, but absent when both are removed. Bootstrapped and tested on arm-none-eabi. OK for master? Thanks, Joe gcc/ChangeLog: 2020-07-31 Joe Ramsay * config/arm/arm-cpus.in: (ALL_FPU_INTERNAL): Remove vfp_base. (VFPv2): Remove vfp_base. (MVE): Remove vfp_base. (vfp_base): Redefine as implied bit dependent on MVE or FP (cortex-m55): Add flags to disable MVE, MVE FP, FP and DSP extensions. * config/arm/arm.c (arm_configure_build_target): Add implied bits to ISA. * config/arm/parsecpu.awk: (gen_isa): Print implied bits and their dependencies to ISA header. (gen_data): Add parsing for implied feature bits. gcc/testsuite/ChangeLog: 2020-07-31 Joe Ramsay * gcc.target/arm/multilib.exp: Add tests for -mcpu=cortex-m55. * gcc.target/arm/cortex-m55-nodsp-flag.c: New test. * gcc.target/arm/cortex-m55-nodsp-nofp-flag.c: New test. * gcc.target/arm/cortex-m55-nofp-flag.c: New test. * gcc.target/arm/cortex-m55-nofp-nomve-flag.c: New test. * gcc.target/arm/cortex-m55-nomve-flag.c: New test. * gcc.target/arm/cortex-m55-nomve.fp-flag.c: New test. --- gcc/config/arm/arm-cpus.in | 26 --- gcc/config/arm/arm.c | 14 ++ gcc/config/arm/parsecpu.awk| 51 ++ .../gcc.target/arm/cortex-m55-nodsp-flag-hard.c| 15 +++ .../gcc.target/arm/cortex-m55-nodsp-flag-softfp.c | 15 +++ .../arm/cortex-m55-nodsp-nofp-flag-softfp.c| 15 +++ .../gcc.target/arm/cortex-m55-nofp-flag-hard.c | 15 +++ .../gcc.target/arm/cortex-m55-nofp-flag-softfp.c | 15 +++ .../arm/cortex-m55-nofp-nomve-flag-softfp.c| 15 +++ .../gcc.target/arm/cortex-m55-nomve-flag-hard.c| 15 +++ .../gcc.target/arm/cortex-m55-nomve-flag-softfp.c | 15 +++ .../gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c | 15 +++ .../arm/cortex-m55-nomve.fp-flag-softfp.c | 15 +++ gcc/testsuite/gcc.target/arm/multilib.exp | 16 +++ 14 files changed, 250 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index c98f8ed..5083028 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -135,10 +135,6 @@ define feature armv8_1m_main # Floating point and Neon extensions. # VFPv1 is not supported in GCC. -# This feature bit is enabled for all VFP, MVE and -# MVE with floating point extensions. -define feature vfp_base - # Vector floating point v2. define feature vfpv2 @@ -251,7 +247,7 @@ define fgroup ALL_SIMD ALL_SIMD_INTERNAL ALL_SIMD_EXTERNAL # List of all FPU bits to strip out if -mfpu is used to override the # default. fp16 is deliberately missing from this list. -define fgroup ALL_FPU_INTERNAL vfp_base vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL +define fgroup ALL_FPU_INTERNAL vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL # Similarly, but including fp16 and other extensions that aren't part of # -mfpu support. define fgroup ALL_FPU_EXTERNAL fp16 bf16 @@ -296,11 +292,11 @@ define fgroup ARMv8r ARMv8a define fgroup ARMv8_1m_main ARMv8m_main armv8_1m_main # Useful combinations. -defi
Re: [PATCH] arm: Require MVE memory operand for destination of vst1q intrinsic
Hi Ramana, Thanks for the review. On 18/08/2020, 18:37, "Ramana Radhakrishnan" wrote: On Thu, Aug 13, 2020 at 2:18 PM Joe Ramsay wrote: > > From: Joe Ramsay > > Hi, > > Previously, the machine description patterns for vst1q accepted a generic memory > operand for the destination, which could lead to an unrecognised builtin when > expanding vst1q* intrinsics. This change fixes the patterns to only accept MVE > memory operands. This is OK though I suspect this needs a PR and a backport request for GCC 10. There's now a PR for this, 96683. I've attached an updated patch file, the only change is that I've included the PR number in the changelog. Please let me know if this is OK for trunk. Thanks, Joe regards Ramana > > Thanks, > Joe > > gcc/ChangeLog: > > 2020-08-13 Joe Ramsay > > * config/arm/mve.md (mve_vst1q_f): Require MVE memory operand for > destination. > (mve_vst1q_): Likewise. > > gcc/testsuite/ChangeLog: > > 2020-08-13 Joe Ramsay > > * gcc.target/arm/mve/intrinsics/vst1q_f16.c: Add test that only MVE > memory operand is accepted. > * gcc.target/arm/mve/intrinsics/vst1q_s16.c: Likewise. > * gcc.target/arm/mve/intrinsics/vst1q_s8.c: Likewise. > * gcc.target/arm/mve/intrinsics/vst1q_u16.c: Likewise. > * gcc.target/arm/mve/intrinsics/vst1q_u8.c: Likewise. > --- > gcc/config/arm/mve.md | 4 ++-- > gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c | 10 +++--- > gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c | 10 +++--- > gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c | 10 +++--- > gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c | 10 +++--- > gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u8.c | 10 +++--- > 6 files changed, 37 insertions(+), 17 deletions(-) > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 9758862..465b39a 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -9330,7 +9330,7 @@ >[(set_attr "length" "4")]) > > (define_expand "mve_vst1q_f" > - [(match_operand: 0 "memory_operand") > + [(match_operand: 0 "mve_memory_operand") > (unspec: [(match_operand:MVE_0 1 "s_register_operand")] VST1Q_F) >] >"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT" > @@ -9340,7 +9340,7 @@ > }) > > (define_expand "mve_vst1q_" > - [(match_operand:MVE_2 0 "memory_operand") > + [(match_operand:MVE_2 0 "mve_memory_operand") > (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q) >] >"TARGET_HAVE_MVE" > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c > index 363b4ca..312b746 100644 > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c > @@ -10,12 +10,16 @@ foo (float16_t * addr, float16x8_t value) >vst1q_f16 (addr, value); > } > > -/* { dg-final { scan-assembler "vstrh.16" } } */ > - > void > foo1 (float16_t * addr, float16x8_t value) > { >vst1q (addr, value); > } > > -/* { dg-final { scan-assembler "vstrh.16" } } */ > +/* { dg-final { scan-assembler-times "vstrh.16" 2 } } */ > + > +void > +foo2 (float16_t a, float16x8_t x) > +{ > + vst1q (&a, x); > +} > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c > index 37c4713..cd14e2c 100644 > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c > @@ -10,12 +10,16 @@ foo (int16_t * addr, int16x8_t value) >vst1q_s16 (addr, value); > } > > -/* { dg-final { scan-assembler "vstrh.16" } } */ > - > void > foo1 (int16_t * addr, int16x8_t value) > { >vst1q (addr, value); > } > > -/* { dg-final { scan-assembler "vstrh.16" } } */ > +/* { dg-final { scan-assembler-times "vstrh.16" 2 } } */ > + > +void > +f
[PATCH][GCC][GCC-10 backport] arm: Require MVE memory operand for destination of vst1q intrinsic
From: Joe Ramsay Hi, Previously, the machine description patterns for vst1q accepted a generic memory operand for the destination, which could lead to an unrecognised builtin when expanding vst1q* intrinsics. This change fixes the pattern to only accept MVE memory operands. Tested on arm-none-eabi, clean w.r.t. gcc and CMSIS-DSP testsuites. Backports cleanly onto gcc-10 branch. OK for backport? Thanks, Joe gcc/ChangeLog: PR target/96683 * config/arm/mve.md (mve_vst1q_f): Require MVE memory operand for destination. (mve_vst1q_): Likewise. gcc/testsuite/ChangeLog: PR target/96683 * gcc.target/arm/mve/intrinsics/vst1q_f16.c: New test. * gcc.target/arm/mve/intrinsics/vst1q_s16.c: New test. * gcc.target/arm/mve/intrinsics/vst1q_s8.c: New test. * gcc.target/arm/mve/intrinsics/vst1q_u16.c: New test. * gcc.target/arm/mve/intrinsics/vst1q_u8.c: New test. (cherry picked from commit 91d206adfe39ce063f6a5731b92a03c05e82e94a) --- gcc/config/arm/mve.md | 4 ++-- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c | 10 +++--- gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u8.c | 10 +++--- 6 files changed, 37 insertions(+), 17 deletions(-) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 9758862..465b39a 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -9330,7 +9330,7 @@ [(set_attr "length" "4")]) (define_expand "mve_vst1q_f" - [(match_operand: 0 "memory_operand") + [(match_operand: 0 "mve_memory_operand") (unspec: [(match_operand:MVE_0 1 "s_register_operand")] VST1Q_F) ] "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT" @@ -9340,7 +9340,7 @@ }) (define_expand "mve_vst1q_" - [(match_operand:MVE_2 0 "memory_operand") + [(match_operand:MVE_2 0 "mve_memory_operand") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q) ] "TARGET_HAVE_MVE" diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c index 363b4ca..312b746 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_f16.c @@ -10,12 +10,16 @@ foo (float16_t * addr, float16x8_t value) vst1q_f16 (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ - void foo1 (float16_t * addr, float16x8_t value) { vst1q (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ +/* { dg-final { scan-assembler-times "vstrh.16" 2 } } */ + +void +foo2 (float16_t a, float16x8_t x) +{ + vst1q (&a, x); +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c index 37c4713..cd14e2c 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s16.c @@ -10,12 +10,16 @@ foo (int16_t * addr, int16x8_t value) vst1q_s16 (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ - void foo1 (int16_t * addr, int16x8_t value) { vst1q (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ +/* { dg-final { scan-assembler-times "vstrh.16" 2 } } */ + +void +foo2 (int16_t a, int16x8_t x) +{ + vst1q (&a, x); +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c index fe5edea..0004c80 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_s8.c @@ -10,12 +10,16 @@ foo (int8_t * addr, int8x16_t value) vst1q_s8 (addr, value); } -/* { dg-final { scan-assembler "vstrb.8" } } */ - void foo1 (int8_t * addr, int8x16_t value) { vst1q (addr, value); } -/* { dg-final { scan-assembler "vstrb.8" } } */ +/* { dg-final { scan-assembler-times "vstrb.8" 2 } } */ + +void +foo2 (int8_t a, int8x16_t x) +{ + vst1q (&a, x); +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c index a4c8c1a..248e7ce 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vst1q_u16.c @@ -10,12 +10,16 @@ foo (uint16_t * addr, uint16x8_t value) vst1q_u16 (addr, value); } -/* { dg-final { scan-assembler "vstrh.16" } } */ - void foo1 (uint16_t * addr, uint16x8_t value) { vst1q (addr, value); } -/* { dg-final { scan-assemb
[PATCH]: Optimize a >= 0 && b >= 0 to (a | b) >= 0 [PR95731]
Hi! This is a fix for PR95731, which adds a new pattern to simplify a >= 0 && b >= 0 to (a | b) >= 0. Bootstrapped and tested on x86_linux and aarch64_linux. Any comments are appreciated. Thanks, Joe gcc/ChangeLog: 2020-05-20 Joe Ramsay * match.pd: New pattern to optimize a >= 0 && b >= 0 to (a | b) >= 0 gcc/testsuite/ChangeLog: 2020-05-20 Joe Ramsay * gcc.dg/tree-ssa/pr95731-1.c: New test. * gcc.dg/tree-ssa/pr95731-2.c: New test. pr95731.diff Description: pr95731.diff
[PATCH v2]: Optimize a >= 0 && b >= 0 to (a | b) >= 0 [PR95731]
Hi! Apologies, resending as I previously attached the wrong patch file. This is a fix for PR95731, which adds a new pattern to simplify a >= 0 && b >= 0 to (a | b) >= 0. Bootstrapped and tested on x86_linux and aarch64_linux. Any comments are appreciated. Thanks, Joe gcc/ChangeLog: 2020-05-20 Joe Ramsay mailto:joe.ram...@arm.com>> * match.pd: New simplication. gcc/testsuite/ChangeLog: 2020-05-20 Joe Ramsay mailto:joe.ram...@arm.com>> * gcc.dg/tree-ssa/pr95731-1.c: New test. * gcc.dg/tree-ssa/pr95731-2.c: New test. pr95731.diff Description: pr95731.diff
[PATCH] [PATCH][GCC] arm: Enable no-writeback vldr.16/vstr.16.
Hi, There was previously no way to specify that a register operand cannot have any writeback modifiers, and as a result the argument to vldr.16 and vstr.16 could be erroneously output with post-increment. This change adds an operand specifier which forbids all writeback, and selects it in the relevant case for vldr.16 and vstr.16 Bootstrapped on arm-linux, gcc and CMSIS-DSP testsuites are clean. Is this patch OK for trunk? If yes, please commit on my behalf as I don't have commit rights. Thanks, Joe gcc/ChangeLog: 2020-05-20 Joe Ramsay * config/arm/arm-protos.h (arm_coproc_mem_operand_no_writeback): Declare prototype. (arm_mve_mode_and_operands_type_check): Declare prototype. * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use _arm_coproc_mem_operand. (arm_coproc_mem_operand_wb): New function to cover full, limited and no writeback. (arm_coproc_mem_operand_no_writeback): New constraint for memory operand with no writeback. (arm_print_operand): Implement 'j' specifier for memory operand that does not support writeback. (arm_mve_mode_and_operands_type_check): New constraint check for MVE memory operands. * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and vstr.16. * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for vldr.16. (*mov_store_vfp_hf16): New pattern for vstr.16. (*mov_vfp_16): Remove MVE moves. gcc/testsuite/ChangeLog: 2020-05-20 Joe Ramsay * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New test. --- gcc/config/arm/arm-protos.h| 3 + gcc/config/arm/arm.c | 100 ++--- gcc/config/arm/constraints.md | 7 ++ gcc/config/arm/vfp.md | 28 -- .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c | 17 5 files changed, 135 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 33d162c..e811da4 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -115,8 +115,11 @@ extern enum reg_class coproc_secondary_reload_class (machine_mode, rtx, extern bool arm_tls_referenced_p (rtx); extern int arm_coproc_mem_operand (rtx, bool); +extern int arm_coproc_mem_operand_no_writeback (rtx); +extern int arm_coproc_mem_operand_wb (rtx, int); extern int neon_vector_mem_operand (rtx, int, bool); extern int mve_vector_mem_operand (machine_mode, rtx, bool); +bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx); extern int neon_struct_mem_operand (rtx); extern rtx *neon_vcmla_lane_prepare_operands (rtx *); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 6b7ca82..ed080d2 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13217,13 +13217,14 @@ neon_element_bits (machine_mode mode) /* Predicates for `match_operand' and `match_operator'. */ /* Return TRUE if OP is a valid coprocessor memory address pattern. - WB is true if full writeback address modes are allowed and is false + WB level is 2 if full writeback address modes are allowed, 1 if limited writeback address modes (POST_INC and PRE_DEC) are - allowed. */ + allowed and 0 if no writeback at all is supported. */ int -arm_coproc_mem_operand (rtx op, bool wb) +arm_coproc_mem_operand_wb (rtx op, int wb_level) { + gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2); rtx ind; /* Reject eliminable registers. */ @@ -13256,16 +13257,18 @@ arm_coproc_mem_operand (rtx op, bool wb) /* Autoincremment addressing modes. POST_INC and PRE_DEC are acceptable in any case (subject to verification by - arm_address_register_rtx_p). We need WB to be true to accept + arm_address_register_rtx_p). We need full writeback to accept + PRE_INC and POST_DEC, and at least restricted writeback for PRE_INC and POST_DEC. */ - if (GET_CODE (ind) == POST_INC - || GET_CODE (ind) == PRE_DEC - || (wb - && (GET_CODE (ind) == PRE_INC - || GET_CODE (ind) == POST_DEC))) + if (wb_level > 0 + && (GET_CODE (ind) == POST_INC + || GET_CODE (ind) == PRE_DEC + || (wb_level > 1 + && (GET_CODE (ind) == PRE_INC + || GET_CODE (ind) == POST_DEC return arm_address_register_rtx_p (XEXP (ind, 0), 0); - if (wb + if (wb_level > 1 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) && arm_address_register_rtx_p (XEXP (ind, 0), 0) && GET_CODE (XEXP (ind, 1)) == PLUS @@ -13287,6 +13290,25 @@ arm_coproc_mem_operand (rtx op, bool wb) return FALSE; } +/* Return TRUE if OP is a valid coprocessor memory address pattern. + WB is true
Re: [PATCH] [PATCH][GCC] arm: Enable no-writeback vldr.16/vstr.16.
Thanks for the feedback Kyrill. On 28/07/2020, 10:16, "Kyrylo Tkachov" wrote: Hi Joe, > -Original Message- > From: Gcc-patches On Behalf Of Joe > Ramsay > Sent: 27 July 2020 15:08 > To: Jakub Jelinek via Gcc-patches > Subject: [PATCH] [PATCH][GCC] arm: Enable no-writeback vldr.16/vstr.16. > > Hi, > > There was previously no way to specify that a register operand cannot > have any writeback modifiers, and as a result the argument to vldr.16 > and vstr.16 could be erroneously output with post-increment. This > change adds an operand specifier which forbids all writeback, and > selects it in the relevant case for vldr.16 and vstr.16 > > Bootstrapped on arm-linux, gcc and CMSIS-DSP testsuites are clean. > Is this patch OK for trunk? If yes, please commit on my behalf as I don't > have commit rights. > > Thanks, > Joe > > gcc/ChangeLog: > > 2020-05-20 Joe Ramsay > > * config/arm/arm-protos.h (arm_coproc_mem_operand_no_writeback): > Declare prototype. > (arm_mve_mode_and_operands_type_check): Declare prototype. > * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use > _arm_coproc_mem_operand. > (arm_coproc_mem_operand_wb): New function to cover full, limited > and no writeback. > (arm_coproc_mem_operand_no_writeback): New constraint for > memory operand with no writeback. > (arm_print_operand): Implement 'j' specifier for memory operand that > does not support > writeback. > (arm_mve_mode_and_operands_type_check): New constraint check for > MVE memory operands. > * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and > vstr.16. > * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for vldr.16. > (*mov_store_vfp_hf16): New pattern for vstr.16. > (*mov_vfp_16): Remove MVE moves. > > gcc/testsuite/ChangeLog: > > 2020-05-20 Joe Ramsay > > * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New test. > > --- > gcc/config/arm/arm-protos.h| 3 + > gcc/config/arm/arm.c | 100 ++--- > gcc/config/arm/constraints.md | 7 ++ > gcc/config/arm/vfp.md | 28 -- > .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c | 17 > 5 files changed, 135 insertions(+), 20 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve- > vldstr16-no-writeback.c > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index 33d162c..e811da4 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -115,8 +115,11 @@ extern enum reg_class > coproc_secondary_reload_class (machine_mode, rtx, > extern bool arm_tls_referenced_p (rtx); > > extern int arm_coproc_mem_operand (rtx, bool); > +extern int arm_coproc_mem_operand_no_writeback (rtx); > +extern int arm_coproc_mem_operand_wb (rtx, int); > extern int neon_vector_mem_operand (rtx, int, bool); > extern int mve_vector_mem_operand (machine_mode, rtx, bool); > +bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx); > extern int neon_struct_mem_operand (rtx); > > extern rtx *neon_vcmla_lane_prepare_operands (rtx *); > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 6b7ca82..ed080d2 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -13217,13 +13217,14 @@ neon_element_bits (machine_mode mode) > /* Predicates for `match_operand' and `match_operator'. */ > > /* Return TRUE if OP is a valid coprocessor memory address pattern. > - WB is true if full writeback address modes are allowed and is false > + WB level is 2 if full writeback address modes are allowed, 1 > if limited writeback address modes (POST_INC and PRE_DEC) are > - allowed. */ > + allowed and 0 if no writeback at all is supported. */ > > int > -arm_coproc_mem_operand (rtx op, bool wb) > +arm_coproc_mem_operand_wb (rtx op, int wb_level) > { > + gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2); >rtx ind; > >/* Reject eliminable registers. */ > @@ -13256,16 +13257,18 @@ arm_coproc_mem_operand (rtx op, bool wb) > >
[PATCH v2][GCC] arm: Enable no-writeback vldr.16/vstr.16.
Hi, There was previously no way to specify that a register operand cannot have any writeback modifiers, and as a result the argument to vldr.16 and vstr.16 could be erroneously output with post-increment. This change adds a constraint which forbids all writeback, and selects it in the relevant case for vldr.16 and vstr.16 Bootstrapped on arm-linux, gcc and CMSIS-DSP testsuites are clean. Is this patch OK for trunk? If yes, please commit on my behalf as I don't have commit rights. Thanks, Joe gcc/ChangeLog: 2020-05-20 Joe Ramsay * config/arm/arm-protos.h (arm_coproc_mem_operand_no_writeback): Declare prototype. (arm_mve_mode_and_operands_type_check): Declare prototype. * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use _arm_coproc_mem_operand. (arm_coproc_mem_operand_wb): New function to cover full, limited and no writeback. (arm_coproc_mem_operand_no_writeback): New constraint for memory operand with no writeback. (arm_print_operand): Extend 'E' specifier for memory operand that does not support writeback. (arm_mve_mode_and_operands_type_check): New constraint check for MVE memory operands. * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and vstr.16. * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for vldr.16. (*mov_store_vfp_hf16): New pattern for vstr.16. (*mov_vfp_16): Remove MVE moves. gcc/testsuite/ChangeLog: 2020-05-20 Joe Ramsay * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New test. --- gcc/config/arm/arm-protos.h| 3 + gcc/config/arm/arm.c | 74 ++ gcc/config/arm/constraints.md | 7 ++ gcc/config/arm/vfp.md | 26 +--- .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c | 17 + 5 files changed, 105 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 33d162c..e811da4 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -115,8 +115,11 @@ extern enum reg_class coproc_secondary_reload_class (machine_mode, rtx, extern bool arm_tls_referenced_p (rtx); extern int arm_coproc_mem_operand (rtx, bool); +extern int arm_coproc_mem_operand_no_writeback (rtx); +extern int arm_coproc_mem_operand_wb (rtx, int); extern int neon_vector_mem_operand (rtx, int, bool); extern int mve_vector_mem_operand (machine_mode, rtx, bool); +bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx); extern int neon_struct_mem_operand (rtx); extern rtx *neon_vcmla_lane_prepare_operands (rtx *); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 6b7ca82..63e052f 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13217,13 +13217,14 @@ neon_element_bits (machine_mode mode) /* Predicates for `match_operand' and `match_operator'. */ /* Return TRUE if OP is a valid coprocessor memory address pattern. - WB is true if full writeback address modes are allowed and is false + WB level is 2 if full writeback address modes are allowed, 1 if limited writeback address modes (POST_INC and PRE_DEC) are - allowed. */ + allowed and 0 if no writeback at all is supported. */ int -arm_coproc_mem_operand (rtx op, bool wb) +arm_coproc_mem_operand_wb (rtx op, int wb_level) { + gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2); rtx ind; /* Reject eliminable registers. */ @@ -13256,16 +13257,18 @@ arm_coproc_mem_operand (rtx op, bool wb) /* Autoincremment addressing modes. POST_INC and PRE_DEC are acceptable in any case (subject to verification by - arm_address_register_rtx_p). We need WB to be true to accept + arm_address_register_rtx_p). We need full writeback to accept + PRE_INC and POST_DEC, and at least restricted writeback for PRE_INC and POST_DEC. */ - if (GET_CODE (ind) == POST_INC - || GET_CODE (ind) == PRE_DEC - || (wb - && (GET_CODE (ind) == PRE_INC - || GET_CODE (ind) == POST_DEC))) + if (wb_level > 0 + && (GET_CODE (ind) == POST_INC + || GET_CODE (ind) == PRE_DEC + || (wb_level > 1 + && (GET_CODE (ind) == PRE_INC + || GET_CODE (ind) == POST_DEC return arm_address_register_rtx_p (XEXP (ind, 0), 0); - if (wb + if (wb_level > 1 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) && arm_address_register_rtx_p (XEXP (ind, 0), 0) && GET_CODE (XEXP (ind, 1)) == PLUS @@ -13287,6 +13290,25 @@ arm_coproc_mem_operand (rtx op, bool wb) return FALSE; } +/* Return TRUE if OP is a valid coprocessor memory address pattern. + WB is true if full write
[committed] MAINTAINERS: Add myself for write after approval
2020-07-30 Joe Ramsay * MAINTAINERS (Write After Approval): Add myself. --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 300c10e..0b825c7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -563,6 +563,7 @@ Vladimir Prus Yao Qi Jerry Quinn Easwaran Raman +Joe Ramsay Rolf Rasmussen Fritz Reese Volker Reichelt -- 2.7.4
[PATCH][GCC-10 Backport] arm: Enable no-writeback vldr.16/vstr.16.
From: Joe Ramsay Hi, There was previously no way to specify that a register operand cannot have any writeback modifiers, and as a result the argument to vldr.16 and vstr.16 could be erroneously output with post-increment. This change adds a constraint which forbids all writeback, and selects it in the relevant case for vldr.16 and vstr.16 Bootstrapped on arm-none-eabi. Patch backports cleanly onto gcc-10 branch with no regressions. OK for gcc-10 branch? Thanks, Joe gcc/ChangeLog: 2020-08-04 Joe Ramsay Backported from master 2020-05-20 Joe Ramsay * config/arm/arm-protos.h (arm_coproc_mem_operand_no_writeback): Declare prototype. (arm_mve_mode_and_operands_type_check): Declare prototype. * config/arm/arm.c (arm_coproc_mem_operand): Refactor to use _arm_coproc_mem_operand. (arm_coproc_mem_operand_wb): New function to cover full, limited and no writeback. (arm_coproc_mem_operand_no_writeback): New constraint for memory operand with no writeback. (arm_print_operand): Extend 'E' specifier for memory operand that does not support writeback. (arm_mve_mode_and_operands_type_check): New constraint check for MVE memory operands. * config/arm/constraints.md: Add Uj constraint for VFP vldr.16 and vstr.16. * config/arm/vfp.md (*mov_load_vfp_hf16): New pattern for vldr.16. (*mov_store_vfp_hf16): New pattern for vstr.16. (*mov_vfp_16): Remove MVE moves. gcc/testsuite/ChangeLog: 2020-08-04 Joe Ramsay Backported from master 2020-05-20 Joe Ramsay * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: New test. --- gcc/config/arm/arm-protos.h| 3 + gcc/config/arm/arm.c | 74 ++ gcc/config/arm/constraints.md | 7 ++ gcc/config/arm/vfp.md | 26 +--- .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c | 17 + 5 files changed, 105 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 33d162c..e811da4 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -115,8 +115,11 @@ extern enum reg_class coproc_secondary_reload_class (machine_mode, rtx, extern bool arm_tls_referenced_p (rtx); extern int arm_coproc_mem_operand (rtx, bool); +extern int arm_coproc_mem_operand_no_writeback (rtx); +extern int arm_coproc_mem_operand_wb (rtx, int); extern int neon_vector_mem_operand (rtx, int, bool); extern int mve_vector_mem_operand (machine_mode, rtx, bool); +bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx); extern int neon_struct_mem_operand (rtx); extern rtx *neon_vcmla_lane_prepare_operands (rtx *); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index a8825ee..d8da167 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13192,13 +13192,14 @@ neon_element_bits (machine_mode mode) /* Predicates for `match_operand' and `match_operator'. */ /* Return TRUE if OP is a valid coprocessor memory address pattern. - WB is true if full writeback address modes are allowed and is false + WB level is 2 if full writeback address modes are allowed, 1 if limited writeback address modes (POST_INC and PRE_DEC) are - allowed. */ + allowed and 0 if no writeback at all is supported. */ int -arm_coproc_mem_operand (rtx op, bool wb) +arm_coproc_mem_operand_wb (rtx op, int wb_level) { + gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2); rtx ind; /* Reject eliminable registers. */ @@ -13231,16 +13232,18 @@ arm_coproc_mem_operand (rtx op, bool wb) /* Autoincremment addressing modes. POST_INC and PRE_DEC are acceptable in any case (subject to verification by - arm_address_register_rtx_p). We need WB to be true to accept + arm_address_register_rtx_p). We need full writeback to accept + PRE_INC and POST_DEC, and at least restricted writeback for PRE_INC and POST_DEC. */ - if (GET_CODE (ind) == POST_INC - || GET_CODE (ind) == PRE_DEC - || (wb - && (GET_CODE (ind) == PRE_INC - || GET_CODE (ind) == POST_DEC))) + if (wb_level > 0 + && (GET_CODE (ind) == POST_INC + || GET_CODE (ind) == PRE_DEC + || (wb_level > 1 + && (GET_CODE (ind) == PRE_INC + || GET_CODE (ind) == POST_DEC return arm_address_register_rtx_p (XEXP (ind, 0), 0); - if (wb + if (wb_level > 1 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) && arm_address_register_rtx_p (XEXP (ind, 0), 0) && GET_CODE (XEXP (ind, 1)) == PLUS @@ -13262,6 +13265,25 @@ arm_coproc_m
[PATCH] aarch64: add support for unpacked EOR, ORR and AND
Hi! This patch improves code generation for EOR, ORR and AND on unpacked vectors with SVE. The following function: void f (unsigned int *x, unsigned short *y, unsigned short *z) { for (int i = 0; i < 7; ++i) x[i] = (unsigned short) (y[i] & z[i]); } previously compiled to ptrue p1.d, vl3 ld1hz0.d, p1/z, [x1, #1, mul vl] ptrue p0.b, vl32 st1hz0.d, p0, [sp, #1, mul vl] ld1hz0.d, p1/z, [x2, #1, mul vl] st1hz0.d, p0, [sp] ldr x3, [x2] ldp x4, x2, [sp] ldr x1, [x1] and x1, x3, x1 and x2, x2, x4 str x2, [sp] ld1hz0.d, p0/z, [sp] str x1, [sp] uxthz0.s, p0/m, z0.s st1wz0.d, p1, [x0, #1, mul vl] ld1hz0.d, p0/z, [sp] uxthz0.s, p0/m, z0.s st1wz0.d, p0, [x0] add sp, sp, 16 ret and now compiles to: ptrue p0.s, vl7 ptrue p1.b, vl32 ld1hz1.s, p0/z, [x1] ld1hz0.s, p0/z, [x2] add z0.h, z0.h, z1.h uxthz0.s, p1/m, z0.s st1wz0.s, p0, [x0] ret Tested on aarch64-linux-gnu and x86_64-linux-gnu hosts. Thanks, Joe 2020-05-20 Joe Ramsay * config/aarch64/aarch64-sve.md (3): Add support for unpacked EOR, ORR, AND. gcc/testsuite/ChangeLog 2020-05-20 Joe Ramsay * gcc.target/aarch64/sve/logical_unpacked_and_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_7.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_7.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_7.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index f7a0893..8f0944c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -4211,10 +4211,10 @@ ;; Unpredicated integer binary logical operations. (define_insn "3" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w") - (LOGICAL:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w") - (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w") + (LOGICAL:SVE_I +(match_operand:SVE_I 1 "register_operand" "%0, w, w") +(match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] "TARGET_SVE" "@ \t%0., %0., #%C2 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_1.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_1.c new file mode 100644 index 000..7840355 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_1.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include + +void +f (uint32_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) +dst[i] = (uint16_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_2.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_2.c n
[PATCH]: aarch64: add support for unpacked EOR, ORR and AND
From: Joe Ramsay Date: Thursday, 28 May 2020 at 16:19 To: Gcc-patches Subject: [PATCH]: aarch64: add support for unpacked EOR, ORR and AND Hi! This patch improves code generation for EOR, ORR and AND on unpacked vectors with SVE. The following function: void f (unsigned int *x, unsigned short *y, unsigned short *z) { for (int i = 0; i < 7; ++i) x[i] = (unsigned short) (y[i] & z[i]); } previously compiled to ptrue p1.d, vl3 ld1hz0.d, p1/z, [x1, #1, mul vl] ptrue p0.b, vl32 st1hz0.d, p0, [sp, #1, mul vl] ld1hz0.d, p1/z, [x2, #1, mul vl] st1hz0.d, p0, [sp] ldr x3, [x2] ldp x4, x2, [sp] ldr x1, [x1] and x1, x3, x1 and x2, x2, x4 str x2, [sp] ld1hz0.d, p0/z, [sp] str x1, [sp] uxthz0.s, p0/m, z0.s st1wz0.d, p1, [x0, #1, mul vl] ld1hz0.d, p0/z, [sp] uxthz0.s, p0/m, z0.s st1wz0.d, p0, [x0] add sp, sp, 16 ret and now compiles to: ptrue p0.s, vl7 ptrue p1.b, vl32 ld1hz1.s, p0/z, [x1] ld1hz0.s, p0/z, [x2] add z0.h, z0.h, z1.h uxthz0.s, p1/m, z0.s st1wz0.s, p0, [x0] ret Tested on aarch64-linux-gnu and x86_64-linux-gnu hosts. Thanks, Joe 2020-05-20 Joe Ramsay * config/aarch64/aarch64-sve.md (3): Add support for unpacked EOR, ORR, AND. gcc/testsuite/ChangeLog 2020-05-20 Joe Ramsay * gcc.target/aarch64/sve/load_const_offset_2.c: Force using packed vectors. * gcc.target/aarch64/sve/logical_unpacked_and_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_7.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_7.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_7.c: New test. * gcc.target/aarch64/sve/scatter_store_6.c: Force using packed vectors. * gcc.target/aarch64/sve/scatter_store_7.c: Force using packed vectors. * gcc.target/aarch64/sve/strided_load_3.c: Force using packed vectors. * gcc.target/aarch64/sve/strided_store_3.c: Force using packed vectors. * gcc.target/aarch64/sve/unpack_signed_1.c: Force using packed vectors. 0001-Support-AND-ORR-EOR-on-unpacked-vectors.patch Description: 0001-Support-AND-ORR-EOR-on-unpacked-vectors.patch
[PATCH] AArch64+SVE: Add support for unpacked unary ops and BIC
Hi! MD patterns extended for unary ops ABS, CLS, CLZ, CNT, NEG and NOT to support unpacked vectors. Also extended patterns for BIC to support unpacked vectors where input elements are of the same width. Tested on x86_64-linux and aarch64-linux hosts. Thanks, Joe gcc/ChangeLog: 2020-05-27 Joe Ramsay * config/aarch64/aarch64-sve.md (2): Add support for unpacked vectors. * config/aarch64/aarch64-sve.md (@aarch64_pred_): Add support for unpacked vectors. * config/aarch64/aarch64-sve.md (@cond_): Add support for unpacked vectors. * config/aarch64/aarch64-sve.md (@aarch64_bic): Enable unpacked BIC. * config/aarch64/aarch64-sve-md (*bic3): Enable unpacked BIC. gcc/testsuite/ChangeLog: 2020-05-27 Joe Ramsay * gcc.target/aarch64/sve/logical_unpacked_abs.c: New test. * gcc.target/aarch64/sve/logical_unpacked_bic_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_bic_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_bic_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_bic_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_neg.c: New test. * gcc.target/aarch64/sve/logical_unpacked_not.c: New test. --- gcc/config/aarch64/aarch64-sve.md | 48 +++--- .../gcc.target/aarch64/sve/logical_unpacked_abs.c | 16 .../aarch64/sve/logical_unpacked_bic_1.c | 15 +++ .../aarch64/sve/logical_unpacked_bic_2.c | 15 +++ .../aarch64/sve/logical_unpacked_bic_3.c | 15 +++ .../aarch64/sve/logical_unpacked_bic_4.c | 15 +++ .../gcc.target/aarch64/sve/logical_unpacked_neg.c | 16 .../gcc.target/aarch64/sve/logical_unpacked_not.c | 16 8 files changed, 132 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_abs.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_bic_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_bic_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_bic_3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_bic_4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_neg.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_not.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 8f0944c..f7100a2 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2822,11 +2822,11 @@ ;; Unpredicated integer unary arithmetic. (define_expand "2" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I [(match_dup 2) - (SVE_INT_UNARY:SVE_FULL_I -(match_operand:SVE_FULL_I 1 "register_operand"))] + (SVE_INT_UNARY:SVE_I + (match_operand:SVE_I 1 "register_operand"))] UNSPEC_PRED_X))] "TARGET_SVE" { @@ -2836,11 +2836,11 @@ ;; Integer unary arithmetic predicated with a PTRUE. (define_insn "@aarch64_pred_" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl") - (SVE_INT_UNARY:SVE_FULL_I -(match_operand:SVE_FULL_I 2 "register_operand" "w"))] + (SVE_INT_UNARY:SVE_I + (match_operand:SVE_I 2 "register_operand" "w"))] UNSPEC_PRED_X))] "TARGET_SVE" "\t%0., %1/m, %2." @@ -2848,12 +2848,12 @@ ;; Predicated integer unary arithmetic with merging. (define_expand "@cond_" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I [(match_operand: 1 "register_operand") - (SVE_INT_UNARY:SVE_FULL_I -(match_operand:SVE_FULL_I 2 "register_operand")) - (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] + (SVE_INT_UNARY:SVE_I + (match_operand:SVE_I 2 "register_operand")) + (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE" ) @@ -4234,13 +4234,13 @@ ;; Unpredicated BIC. (define_expand "@aarch64_bic" - [(set (match_operand:SVE_FULL_I 0 "register_operand&quo