https://gcc.gnu.org/g:952ea3260e40992d3bf5e1f17b4845a4e5c908b5
commit r13-8871-g952ea3260e40992d3bf5e1f17b4845a4e5c908b5 Author: Kyrylo Tkachov <ktkac...@nvidia.com> Date: Wed Jun 19 14:56:02 2024 +0530 Add support for -mcpu=grace This adds support for the NVIDIA Grace CPU to aarch64. We reuse the tuning decisions for the Neoverse V2 core, but include a number of architecture features that are not enabled by default in -mcpu=neoverse-v2. This allows Grace users to more simply target the CPU with -mcpu=grace rather than remembering what extensions to tag on top of -mcpu=neoverse-v2. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64-cores.def (grace): New entry. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi (AArch64 Options): Document the above. Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com> Diff: --- gcc/config/aarch64/aarch64-cores.def | 2 ++ gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index fdda0697b88..bec08ca1910 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -182,6 +182,8 @@ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPER AARCH64_CORE("cobalt-100", cobalt100, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x6d, 0xd49, -1) AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) +AARCH64_CORE("grace", grace, cortexa57, V9A, (I8MM, BF16, CRYPTO, SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, PROFILE), neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) #undef AARCH64_CORE diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 9d46d38a292..6eae8522593 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,cobalt100,neoversev2,demeter" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,cortexx3,neoversen2,cobalt100,neoversev2,grace,demeter" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 914c4bc8e6d..b17d0cf9341 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -20315,8 +20315,8 @@ performance of the code. Permissible values for this option are: @samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, @samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1}, -@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx}, -@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, +@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{grace}, +@samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, @samp{octeontx81}, @samp{octeontx83}, @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},