On 29/06/17 21:53, Jim Wilson wrote:
> Falkor is an ARMV8-A part, but also includes the RDMA extension from
> ARMV8.1-A.
> I'd like to enable support for the RDMA instructions when -mcpu=falkor is
> used,
> and also make the RDMA intrisics available. To do that, I need to add rdma
> as an architecture extension, and modify a few things to use it. Binutils
> already supports rdma as an architecture extension.
>
> I only did the aarch64 port, and not the arm port. There are no supported
> targets that have the RDMA instructions and also aarch32 support. There are
> also no aarch32 RDMA testcases. So there is no way to test it. It wasn't
> clear whether it was better to add something untested or leave it out. I
> chose
> to leave it out for now.
>
> I also needed a few testcase changes. There were redundant options being
> added for the RDMA tests that I had to remove as they are now wrong. Also
> the fact that I only did aarch64 means we need to check both armv8-a+rdma and
> armv8.1-a for the rdma support.
>
> This was tested with an aarch64 bootstrap and make check. There were no
> regressions.
>
> OK?
OK.
R.
>
> Jim
>
> gcc/
> * config/aarch64/aarch64-cores.def (falkor): Add AARCH64_FL_RDMA.
> (qdf24xx): Likewise.
> * config/aarch64/aarch64-options-extensions.def (rdma); New.
> * config/aarch64/aarch64.h (AARCH64_FL_RDMA): New.
> (AARCH64_FL_V8_1): Renumber.
> (AARCH64_FL_FOR_ARCH8_1): Add AARCH64_FL_RDMA.
> (AARCH64_ISA_RDMA): Use AARCH64_FL_RDMA.
> * config/aarch64/arm_neon.h: Use +rdma instead of arch=armv8.1-a.
> * doc/invoke.texi (AArch64 Options): Mention +rmda in -march docs. Add
> rdma to feature modifiers list.
>
> gcc/testsuite/
> * lib/target-supports.exp (add_options_for_arm_v8_1a_neon): Delete
> redundant -march option.
> (check_effective_target_arm_v8_1a_neon_ok_nocache): Try armv8-a+rdma
> in addition to armv8.1-a.
> ---
> gcc/config/aarch64/aarch64-cores.def | 4 ++--
> gcc/config/aarch64/aarch64-option-extensions.def | 4 ++++
> gcc/config/aarch64/aarch64.h | 8 +++++---
> gcc/config/aarch64/arm_neon.h | 2 +-
> gcc/doc/invoke.texi | 5 ++++-
> gcc/testsuite/lib/target-supports.exp | 18 ++++++++++--------
> 6 files changed, 26 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def
> b/gcc/config/aarch64/aarch64-cores.def
> index f8342ca..b8d0ba6 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -65,8 +65,8 @@ AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A,
> AARCH64_FL_FOR_ARCH
> AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8,
> xgene1, 0x50, 0x000, -1)
>
> /* Qualcomm ('Q') cores. */
> -AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
> -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
> +AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51,
> 0xC00, -1)
> +AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51,
> 0xC00, -1)
>
> /* Samsung ('S') cores. */
> AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8
> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
> diff --git a/gcc/config/aarch64/aarch64-option-extensions.def
> b/gcc/config/aarch64/aarch64-option-extensions.def
> index c0752ce..c4f059a 100644
> --- a/gcc/config/aarch64/aarch64-option-extensions.def
> +++ b/gcc/config/aarch64/aarch64-option-extensions.def
> @@ -63,4 +63,8 @@ AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16,
> AARCH64_FL_FP, 0, "fphp asimdhp")
> /* Enabling or disabling "rcpc" only changes "rcpc". */
> AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
>
> +/* Enabling "rdma" also enables "fp", "simd".
> + Disabling "rdma" just disables "rdma". */
> +AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP |
> AARCH64_FL_SIMD, 0, "rdma")
> +
> #undef AARCH64_OPT_EXTENSION
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 106cf3a..7f91edb 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -144,7 +144,8 @@ extern unsigned aarch64_architecture_version;
> #define AARCH64_FL_CRC (1 << 3) /* Has CRC. */
> /* ARMv8.1-A architecture extensions. */
> #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions.
> */
> -#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1-A extensions. */
> +#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply
> Add. */
> +#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */
> /* ARMv8.2-A architecture extensions. */
> #define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */
> #define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16
> extensions. */
> @@ -161,7 +162,8 @@ extern unsigned aarch64_architecture_version;
> /* Architecture flags that effect instruction selection. */
> #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
> #define AARCH64_FL_FOR_ARCH8_1 \
> - (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1)
> + (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
> + | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
> #define AARCH64_FL_FOR_ARCH8_2 \
> (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
> #define AARCH64_FL_FOR_ARCH8_3 \
> @@ -174,7 +176,7 @@ extern unsigned aarch64_architecture_version;
> #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP)
> #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
> #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
> -#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1)
> +#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
> #define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2)
> #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
> #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 0753da3..d7b30b0 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -12162,7 +12162,7 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t
> __c)
>
> /* ARMv8.1-A instrinsics. */
> #pragma GCC push_options
> -#pragma GCC target ("arch=armv8.1-a")
> +#pragma GCC target ("+nothing+rdma")
>
> __extension__ extern __inline int16x4_t
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index d1e097b..2bd10c6 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -14082,7 +14082,7 @@ support for the ARMv8.2-A architecture extensions.
>
> The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler
> support for the ARMv8.1-A architecture extension. In particular, it
> -enables the @samp{+crc} and @samp{+lse} features.
> +enables the @samp{+crc}, @samp{+lse}, and @samp{+rdma} features.
>
> The value @samp{native} is available on native AArch64 GNU/Linux and
> causes the compiler to pick the architecture of the host system. This
> @@ -14198,6 +14198,9 @@ instructions. This is on by default for all possible
> values for options
> @item lse
> Enable Large System Extension instructions. This is on by default for
> @option{-march=armv8.1-a}.
> +@item rdma
> +Enable Round Double Multiply Accumulate instructions. This is on by default
> +for @option{-march=armv8.1-a}.
> @item fp16
> Enable FP16 extension. This also enables floating-point instructions.
>
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index fe5e777..a245eed 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -3425,7 +3425,7 @@ proc add_options_for_arm_v8_1a_neon { flags } {
> return "$flags"
> }
> global et_arm_v8_1a_neon_flags
> - return "$flags $et_arm_v8_1a_neon_flags -march=armv8.1-a"
> + return "$flags $et_arm_v8_1a_neon_flags"
> }
>
> # Add the options needed for ARMv8.2 with the scalar FP16 extension.
> @@ -4115,13 +4115,15 @@ proc check_effective_target_arm_v8_1a_neon_ok_nocache
> { } {
> # since AArch64 only needs the -march setting.
> foreach flags {"" "-mfpu=neon-fp-armv8" "-mfloat-abi=softfp" \
> "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
> - if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object {
> - #if !defined (__ARM_FEATURE_QRDMX)
> - #error "__ARM_FEATURE_QRDMX not defined"
> - #endif
> - } "$flags -march=armv8.1-a"] } {
> - set et_arm_v8_1a_neon_flags "$flags -march=armv8.1-a"
> - return 1
> + foreach arches { "-march=armv8-a+rdma" "-march=armv8.1-a" } {
> + if { [check_no_compiler_messages_nocache arm_v8_1a_neon_ok object {
> + #if !defined (__ARM_FEATURE_QRDMX)
> + #error "__ARM_FEATURE_QRDMX not defined"
> + #endif
> + } "$flags $arches"] } {
> + set et_arm_v8_1a_neon_flags "$flags $arches"
> + return 1
> + }
> }
> }
>
>