https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/121921
>From 785c6eca701edbd42686a4adaa2099b55b5271a0 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Tue, 7 Jan 2025 11:37:32 +0000 Subject: [PATCH 1/4] [FMV][AArch64] Simplify version selection according to ACLE. Currently, the more features a version has, the higher its priority is. We are changing ACLE https://github.com/ARM-software/acle/pull/370 as follows: "Among any two versions, the higher priority version is determined by identifying the highest priority feature that is specified in exactly one of the versions, and selecting that version." --- .../test/CodeGen/attr-target-clones-aarch64.c | 48 ++++---- clang/test/CodeGen/attr-target-version.c | 78 ++++++------- .../llvm/TargetParser/AArch64CPUFeatures.inc | 57 +++++++++- .../llvm/TargetParser/AArch64TargetParser.h | 10 +- llvm/lib/Target/AArch64/AArch64FMV.td | 105 +++++++++--------- llvm/lib/TargetParser/AArch64TargetParser.cpp | 15 +-- .../TableGen/Basic/ARMTargetDefEmitter.cpp | 4 +- 7 files changed, 183 insertions(+), 134 deletions(-) diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c index 50c040f2093b01..b7e3a328db8773 100644 --- a/clang/test/CodeGen/attr-target-clones-aarch64.c +++ b/clang/test/CodeGen/attr-target-clones-aarch64.c @@ -64,20 +64,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284352 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284352 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: -// CHECK-NEXT: ret ptr @ftc._MaesMlse +// CHECK-NEXT: ret ptr @ftc._Msve2 // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 33664 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 33664 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: -// CHECK-NEXT: ret ptr @ftc._Msve2 +// CHECK-NEXT: ret ptr @ftc._MaesMlse // CHECK: resolver_else2: // CHECK-NEXT: ret ptr @ftc.default // @@ -411,20 +411,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: -// CHECK-NEXT: ret ptr @ftc_inline3._MsbMsve +// CHECK-NEXT: ret ptr @ftc_inline3._Mbti // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 70369817985280 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 70369817985280 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: -// CHECK-NEXT: ret ptr @ftc_inline3._Mbti +// CHECK-NEXT: ret ptr @ftc_inline3._MsbMsve // CHECK: resolver_else2: // CHECK-NEXT: ret ptr @ftc_inline3.default // @@ -521,20 +521,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 33664 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 33664 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284352 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284352 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: -// CHECK-MTE-BTI-NEXT: ret ptr @ftc._MaesMlse +// CHECK-MTE-BTI-NEXT: ret ptr @ftc._Msve2 // CHECK-MTE-BTI: resolver_else: // CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 69793284352 -// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 69793284352 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 33664 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 33664 // CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK-MTE-BTI: resolver_return1: -// CHECK-MTE-BTI-NEXT: ret ptr @ftc._Msve2 +// CHECK-MTE-BTI-NEXT: ret ptr @ftc._MaesMlse // CHECK-MTE-BTI: resolver_else2: // CHECK-MTE-BTI-NEXT: ret ptr @ftc.default // @@ -868,20 +868,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 70369817985280 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70369817985280 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: -// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti // CHECK-MTE-BTI: resolver_else: // CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624 -// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 70369817985280 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 70369817985280 // CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK-MTE-BTI: resolver_return1: -// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve // CHECK-MTE-BTI: resolver_else2: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3.default // diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index 2862151ea29432..ea2994b8e5c39d 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -463,12 +463,12 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 66315 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 66315 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 144119586256651008 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 144119586256651008 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: -// CHECK-NEXT: ret ptr @fmv._MflagmMfp16fmlMrng +// CHECK-NEXT: ret ptr @fmv._Msme2 // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72061992218723078 @@ -495,44 +495,44 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: ret ptr @fmv._McrcMls64 // CHECK: resolver_else6: // CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17592186110728 -// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17592186110728 +// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1125899906842624 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 1125899906842624 // CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]] // CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]] // CHECK: resolver_return7: -// CHECK-NEXT: ret ptr @fmv._Mfp16fmlMmemtag +// CHECK-NEXT: ret ptr @fmv._Mbti // CHECK: resolver_else8: // CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 33536 -// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 33536 +// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 17592186110728 +// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 17592186110728 // CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]] // CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]] // CHECK: resolver_return9: -// CHECK-NEXT: ret ptr @fmv._MaesMfp +// CHECK-NEXT: ret ptr @fmv._Mfp16fmlMmemtag // CHECK: resolver_else10: // CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 4992 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 4992 +// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 66315 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 66315 // CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]] // CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]] // CHECK: resolver_return11: -// CHECK-NEXT: ret ptr @fmv._MlseMsha2 +// CHECK-NEXT: ret ptr @fmv._MflagmMfp16fmlMrng // CHECK: resolver_else12: // CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 144119586256651008 -// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 144119586256651008 +// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 33536 +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 33536 // CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]] // CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]] // CHECK: resolver_return13: -// CHECK-NEXT: ret ptr @fmv._Msme2 +// CHECK-NEXT: ret ptr @fmv._MaesMfp // CHECK: resolver_else14: // CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 1125899906842624 -// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 1125899906842624 +// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 4992 +// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 4992 // CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]] // CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]] // CHECK: resolver_return15: -// CHECK-NEXT: ret ptr @fmv._Mbti +// CHECK-NEXT: ret ptr @fmv._MlseMsha2 // CHECK: resolver_else16: // CHECK-NEXT: ret ptr @fmv.default // @@ -773,60 +773,60 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4398182892352 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4398182892352 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 864708720653762560 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 864708720653762560 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: -// CHECK-NEXT: ret ptr @fmv_inline._MfcmaMfp16MrdmMsme +// CHECK-NEXT: ret ptr @fmv_inline._MmemtagMmopsMrcpc3 // CHECK: resolver_else: // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864708720653762560 -// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864708720653762560 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 19861002584864 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 19861002584864 // CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] // CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] // CHECK: resolver_return1: -// CHECK-NEXT: ret ptr @fmv_inline._MmemtagMmopsMrcpc3 +// CHECK-NEXT: ret ptr @fmv_inline._MmemtagMsve2-sm4 // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 894427038464 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 894427038464 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 4398182892352 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 4398182892352 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: -// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-aesMsve2-bitperm +// CHECK-NEXT: ret ptr @fmv_inline._MfcmaMfp16MrdmMsme // CHECK: resolver_else4: // CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 35433583360 -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 35433583360 +// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 1444182864640 +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 1444182864640 // CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]] // CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]] // CHECK: resolver_return5: -// CHECK-NEXT: ret ptr @fmv_inline._MaesMf64mmMsha2 +// CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3 // CHECK: resolver_else6: // CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 18320798464 -// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 18320798464 +// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 894427038464 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 894427038464 // CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]] // CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]] // CHECK: resolver_return7: -// CHECK-NEXT: ret ptr @fmv_inline._Mf32mmMi8mmMsha3 +// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-aesMsve2-bitperm // CHECK: resolver_else8: // CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19861002584864 -// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 19861002584864 +// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 35433583360 +// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[TMP21]], 35433583360 // CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]] // CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]] // CHECK: resolver_return9: -// CHECK-NEXT: ret ptr @fmv_inline._MmemtagMsve2-sm4 +// CHECK-NEXT: ret ptr @fmv_inline._MaesMf64mmMsha2 // CHECK: resolver_else10: // CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 1444182864640 -// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 1444182864640 +// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 18320798464 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], 18320798464 // CHECK-NEXT: [[TMP27:%.*]] = and i1 true, [[TMP26]] // CHECK-NEXT: br i1 [[TMP27]], label [[RESOLVER_RETURN11:%.*]], label [[RESOLVER_ELSE12:%.*]] // CHECK: resolver_return11: -// CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3 +// CHECK-NEXT: ret ptr @fmv_inline._Mf32mmMi8mmMsha3 // CHECK: resolver_else12: // CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 1208025856 diff --git a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc index 6b373ce424678a..998d270b2058c8 100644 --- a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc +++ b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc @@ -15,10 +15,13 @@ // changes in this file, first modify the primary copy and copy it over to // compiler-rt. compiler-rt tests will fail if the two files are not synced up. // +// Additionally this file enumerates the feature priorities in ascending order, +// as defined in the ACLE specification. +// //===----------------------------------------------------------------------===// -#ifndef AARCH64_CPU_FEATURS_INC_H -#define AARCH64_CPU_FEATURS_INC_H +#ifndef AARCH64_CPU_FEATURES_INC_H +#define AARCH64_CPU_FEATURES_INC_H // Function Multi Versioning CPU features. enum CPUFeatures { @@ -88,4 +91,54 @@ enum CPUFeatures { FEAT_INIT // Used as flag of features initialization completion }; +// Function Multi Versioning feature priorities in ascending order. +enum FeatPriorities { + PRIOR_RNG, + PRIOR_FLAGM, + PRIOR_FLAGM2, + PRIOR_LSE, + PRIOR_FP, + PRIOR_SIMD, + PRIOR_DOTPROD, + PRIOR_SM4, + PRIOR_RDM, + PRIOR_CRC, + PRIOR_SHA2, + PRIOR_SHA3, + PRIOR_PMULL, + PRIOR_FP16, + PRIOR_FP16FML, + PRIOR_DIT, + PRIOR_DPB, + PRIOR_DPB2, + PRIOR_JSCVT, + PRIOR_FCMA, + PRIOR_RCPC, + PRIOR_RCPC2, + PRIOR_RCPC3, + PRIOR_FRINTTS, + PRIOR_I8MM, + PRIOR_BF16, + PRIOR_SVE, + PRIOR_SVE_F32MM, + PRIOR_SVE_F64MM, + PRIOR_SVE2, + PRIOR_SVE_PMULL128, + PRIOR_SVE_BITPERM, + PRIOR_SVE_SHA3, + PRIOR_SVE_SM4, + PRIOR_SME, + PRIOR_MEMTAG2, + PRIOR_SB, + PRIOR_PREDRES, + PRIOR_SSBS2, + PRIOR_BTI, + PRIOR_LS64_ACCDATA, + PRIOR_WFXT, + PRIOR_SME_F64, + PRIOR_SME_I64, + PRIOR_SME2, + PRIOR_MOPS +}; + #endif diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index ac8006d671a06d..8ec2201b07db53 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -70,12 +70,12 @@ struct ExtensionInfo { struct FMVInfo { StringRef Name; // The target_version/target_clones spelling. - CPUFeatures Bit; // Index of the bit in the FMV feature bitset. + CPUFeatures FeatureBit; // Index of the bit in the FMV feature bitset. + FeatPriorities PriorityBit; // Index of the bit in the FMV priority bitset. std::optional<ArchExtKind> ID; // The architecture extension to enable. - unsigned Priority; // FMV priority. - FMVInfo(StringRef Name, CPUFeatures Bit, std::optional<ArchExtKind> ID, - unsigned Priority) - : Name(Name), Bit(Bit), ID(ID), Priority(Priority) {}; + FMVInfo(StringRef Name, CPUFeatures FeatureBit, FeatPriorities PriorityBit, + std::optional<ArchExtKind> ID) + : Name(Name), FeatureBit(FeatureBit), PriorityBit(PriorityBit), ID(ID) {}; }; const std::vector<FMVInfo> &getFMVInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td index fc7a94a5fe475f..e0f56fd5556196 100644 --- a/llvm/lib/Target/AArch64/AArch64FMV.td +++ b/llvm/lib/Target/AArch64/AArch64FMV.td @@ -22,64 +22,65 @@ // Something you can add to target_version or target_clones. -class FMVExtension<string n, string b, int p> { +class FMVExtension<string name, string enumeration> { // Name, as spelled in target_version or target_clones. e.g. "memtag". - string Name = n; + string Name = name; // A C++ expression giving the number of the bit in the FMV ABI. // Currently this is given as a value from the enum "CPUFeatures". - string Bit = b; + string FeatureBit = "FEAT_" # enumeration; // SubtargetFeature enabled for codegen when this FMV feature is present. - string BackendFeature = n; + string BackendFeature = name; - // The FMV priority. - int Priority = p; + // A C++ expression giving the number of the priority bit. + // Currently this is given as a value from the enum "FeatPriorities". + string PriorityBit = "PRIOR_" # enumeration; } -def : FMVExtension<"aes", "FEAT_PMULL", 150>; -def : FMVExtension<"bf16", "FEAT_BF16", 280>; -def : FMVExtension<"bti", "FEAT_BTI", 510>; -def : FMVExtension<"crc", "FEAT_CRC", 110>; -def : FMVExtension<"dit", "FEAT_DIT", 180>; -def : FMVExtension<"dotprod", "FEAT_DOTPROD", 104>; -let BackendFeature = "ccpp" in def : FMVExtension<"dpb", "FEAT_DPB", 190>; -let BackendFeature = "ccdp" in def : FMVExtension<"dpb2", "FEAT_DPB2", 200>; -def : FMVExtension<"f32mm", "FEAT_SVE_F32MM", 350>; -def : FMVExtension<"f64mm", "FEAT_SVE_F64MM", 360>; -def : FMVExtension<"fcma", "FEAT_FCMA", 220>; -def : FMVExtension<"flagm", "FEAT_FLAGM", 20>; -let BackendFeature = "altnzcv" in def : FMVExtension<"flagm2", "FEAT_FLAGM2", 30>; -def : FMVExtension<"fp", "FEAT_FP", 90>; -def : FMVExtension<"fp16", "FEAT_FP16", 170>; -def : FMVExtension<"fp16fml", "FEAT_FP16FML", 175>; -let BackendFeature = "fptoint" in def : FMVExtension<"frintts", "FEAT_FRINTTS", 250>; -def : FMVExtension<"i8mm", "FEAT_I8MM", 270>; -def : FMVExtension<"jscvt", "FEAT_JSCVT", 210>; -def : FMVExtension<"ls64", "FEAT_LS64_ACCDATA", 520>; -def : FMVExtension<"lse", "FEAT_LSE", 80>; -def : FMVExtension<"memtag", "FEAT_MEMTAG2", 440>; -def : FMVExtension<"mops", "FEAT_MOPS", 650>; -def : FMVExtension<"predres", "FEAT_PREDRES", 480>; -def : FMVExtension<"rcpc", "FEAT_RCPC", 230>; -let BackendFeature = "rcpc-immo" in def : FMVExtension<"rcpc2", "FEAT_RCPC2", 240>; -def : FMVExtension<"rcpc3", "FEAT_RCPC3", 241>; -def : FMVExtension<"rdm", "FEAT_RDM", 108>; -def : FMVExtension<"rng", "FEAT_RNG", 10>; -def : FMVExtension<"sb", "FEAT_SB", 470>; -def : FMVExtension<"sha2", "FEAT_SHA2", 130>; -def : FMVExtension<"sha3", "FEAT_SHA3", 140>; -def : FMVExtension<"simd", "FEAT_SIMD", 100>; -def : FMVExtension<"sm4", "FEAT_SM4", 106>; -def : FMVExtension<"sme", "FEAT_SME", 430>; -def : FMVExtension<"sme-f64f64", "FEAT_SME_F64", 560>; -def : FMVExtension<"sme-i16i64", "FEAT_SME_I64", 570>; -def : FMVExtension<"sme2", "FEAT_SME2", 580>; -def : FMVExtension<"ssbs", "FEAT_SSBS2", 490>; -def : FMVExtension<"sve", "FEAT_SVE", 310>; -def : FMVExtension<"sve2", "FEAT_SVE2", 370>; -def : FMVExtension<"sve2-aes", "FEAT_SVE_PMULL128", 380>; -def : FMVExtension<"sve2-bitperm", "FEAT_SVE_BITPERM", 400>; -def : FMVExtension<"sve2-sha3", "FEAT_SVE_SHA3", 410>; -def : FMVExtension<"sve2-sm4", "FEAT_SVE_SM4", 420>; -def : FMVExtension<"wfxt", "FEAT_WFXT", 550>; +def : FMVExtension<"aes", "PMULL">; +def : FMVExtension<"bf16", "BF16">; +def : FMVExtension<"bti", "BTI">; +def : FMVExtension<"crc", "CRC">; +def : FMVExtension<"dit", "DIT">; +def : FMVExtension<"dotprod", "DOTPROD">; +let BackendFeature = "ccpp" in def : FMVExtension<"dpb", "DPB">; +let BackendFeature = "ccdp" in def : FMVExtension<"dpb2", "DPB2">; +def : FMVExtension<"f32mm", "SVE_F32MM">; +def : FMVExtension<"f64mm", "SVE_F64MM">; +def : FMVExtension<"fcma", "FCMA">; +def : FMVExtension<"flagm", "FLAGM">; +let BackendFeature = "altnzcv" in def : FMVExtension<"flagm2", "FLAGM2">; +def : FMVExtension<"fp", "FP">; +def : FMVExtension<"fp16", "FP16">; +def : FMVExtension<"fp16fml", "FP16FML">; +let BackendFeature = "fptoint" in def : FMVExtension<"frintts", "FRINTTS">; +def : FMVExtension<"i8mm", "I8MM">; +def : FMVExtension<"jscvt", "JSCVT">; +def : FMVExtension<"ls64", "LS64_ACCDATA">; +def : FMVExtension<"lse", "LSE">; +def : FMVExtension<"memtag", "MEMTAG2">; +def : FMVExtension<"mops", "MOPS">; +def : FMVExtension<"predres", "PREDRES">; +def : FMVExtension<"rcpc", "RCPC">; +let BackendFeature = "rcpc-immo" in def : FMVExtension<"rcpc2", "RCPC2">; +def : FMVExtension<"rcpc3", "RCPC3">; +def : FMVExtension<"rdm", "RDM">; +def : FMVExtension<"rng", "RNG">; +def : FMVExtension<"sb", "SB">; +def : FMVExtension<"sha2", "SHA2">; +def : FMVExtension<"sha3", "SHA3">; +def : FMVExtension<"simd", "SIMD">; +def : FMVExtension<"sm4", "SM4">; +def : FMVExtension<"sme", "SME">; +def : FMVExtension<"sme-f64f64", "SME_F64">; +def : FMVExtension<"sme-i16i64", "SME_I64">; +def : FMVExtension<"sme2", "SME2">; +def : FMVExtension<"ssbs", "SSBS2">; +def : FMVExtension<"sve", "SVE">; +def : FMVExtension<"sve2", "SVE2">; +def : FMVExtension<"sve2-aes", "SVE_PMULL128">; +def : FMVExtension<"sve2-bitperm", "SVE_BITPERM">; +def : FMVExtension<"sve2-sha3", "SVE_SHA3">; +def : FMVExtension<"sve2-sm4", "SVE_SM4">; +def : FMVExtension<"wfxt", "WFXT">; diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index 50c9a565e7ae25..ff9ba79d558a3f 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -49,16 +49,11 @@ std::optional<AArch64::ArchInfo> AArch64::ArchInfo::findBySubArch(StringRef SubA } unsigned AArch64::getFMVPriority(ArrayRef<StringRef> Features) { - constexpr unsigned MaxFMVPriority = 1000; unsigned Priority = 0; - unsigned NumFeatures = 0; - for (StringRef Feature : Features) { - if (auto Ext = parseFMVExtension(Feature)) { - Priority = std::max(Priority, Ext->Priority); - NumFeatures++; - } - } - return Priority + MaxFMVPriority * NumFeatures; + for (StringRef Feature : Features) + if (std::optional<FMVInfo> Info = parseFMVExtension(Feature)) + Priority = std::max(Priority, static_cast<unsigned>(Info->PriorityBit)); + return Priority; } uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> Features) { @@ -73,7 +68,7 @@ uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> Features) { uint64_t FeaturesMask = 0; for (const FMVInfo &Info : getFMVInfo()) if (Info.ID && FeatureBits.Enabled.test(*Info.ID)) - FeaturesMask |= (1ULL << Info.Bit); + FeaturesMask |= (1ULL << Info.FeatureBit); return FeaturesMask; } diff --git a/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp index 3b02f63e9490b1..4dea89ecbeffb3 100644 --- a/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/Basic/ARMTargetDefEmitter.cpp @@ -162,14 +162,14 @@ static void emitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { for (const Record *Rec : FMVExts) { OS << " I.emplace_back("; OS << "\"" << Rec->getValueAsString("Name") << "\""; - OS << ", " << Rec->getValueAsString("Bit"); + OS << ", " << Rec->getValueAsString("FeatureBit"); + OS << ", " << Rec->getValueAsString("PriorityBit"); auto FeatName = Rec->getValueAsString("BackendFeature"); const Record *FeatRec = ExtensionMap[FeatName]; if (FeatRec) OS << ", " << FeatRec->getValueAsString("ArchExtKindSpelling").upper(); else OS << ", std::nullopt"; - OS << ", " << (uint64_t)Rec->getValueAsInt("Priority"); OS << ");\n"; }; OS << " return I;\n" >From 24e9178d98defb20d51247a60666a658f0da66a8 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Tue, 7 Jan 2025 23:08:30 +0000 Subject: [PATCH 2/4] Use bitmask for FMV priority and add a corresponding test. --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/Basic/Targets/AArch64.cpp | 2 +- clang/lib/Basic/Targets/AArch64.h | 2 +- clang/lib/Basic/Targets/RISCV.cpp | 4 +- clang/lib/Basic/Targets/RISCV.h | 2 +- clang/lib/Basic/Targets/X86.cpp | 6 +-- clang/lib/Basic/Targets/X86.h | 2 +- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- clang/test/CodeGen/AArch64/fmv-priority.c | 54 +++++++++++++++++++ clang/test/CodeGen/attr-target-version.c | 12 ++--- .../llvm/TargetParser/AArch64TargetParser.h | 2 +- llvm/lib/TargetParser/AArch64TargetParser.cpp | 6 +-- 12 files changed, 75 insertions(+), 21 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fmv-priority.c diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index f2905f30a7c34b..43c09cf1f973e3 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1531,7 +1531,7 @@ class TargetInfo : public TransferrableTargetInfo, // Return the target-specific priority for features/cpus/vendors so // that they can be properly sorted for checking. - virtual unsigned getFMVPriority(ArrayRef<StringRef> Features) const { + virtual uint64_t getFMVPriority(ArrayRef<StringRef> Features) const { return 0; } diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 53e102bbe44687..2b4b954d0c27ad 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -714,7 +714,7 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const { return std::nullopt; } -unsigned AArch64TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { +uint64_t AArch64TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { return llvm::AArch64::getFMVPriority(Features); } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 68a8b1ebad8cde..4e927c0953b1fc 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -137,7 +137,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; bool setCPU(const std::string &Name) override; - unsigned getFMVPriority(ArrayRef<StringRef> Features) const override; + uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override; bool useFP16ConversionIntrinsics() const override { return false; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index a541dfedc9b8e1..db23b0c2283385 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -489,7 +489,7 @@ ParsedTargetAttr RISCVTargetInfo::parseTargetAttr(StringRef Features) const { return Ret; } -unsigned RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { +uint64_t RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { // Priority is explicitly specified on RISC-V unlike on other targets, where // it is derived by all the features of a specific version. Therefore if a // feature contains the priority string, then return it immediately. @@ -501,7 +501,7 @@ unsigned RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { Feature = RHS; else continue; - unsigned Priority; + uint64_t Priority; if (!Feature.getAsInteger(0, Priority)) return Priority; } diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index 68f10e74ba98c3..bb3f3a5cda7c65 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -122,7 +122,7 @@ class RISCVTargetInfo : public TargetInfo { void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const override; bool supportsTargetAttributeTune() const override { return true; } ParsedTargetAttr parseTargetAttr(StringRef Str) const override; - unsigned getFMVPriority(ArrayRef<StringRef> Features) const override; + uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override; std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override { return std::make_pair(32, 32); diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index d2d92fb864c310..40ad8fd9a0967d 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1357,8 +1357,8 @@ static llvm::X86::ProcessorFeatures getFeature(StringRef Name) { // correct, so it asserts if the value is out of range. } -unsigned X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { - auto getPriority = [](StringRef Feature) -> unsigned { +uint64_t X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { + auto getPriority = [](StringRef Feature) -> uint64_t { // Valid CPUs have a 'key feature' that compares just better than its key // feature. using namespace llvm::X86; @@ -1372,7 +1372,7 @@ unsigned X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const { return getFeaturePriority(getFeature(Feature)) << 1; }; - unsigned Priority = 0; + uint64_t Priority = 0; for (StringRef Feature : Features) if (!Feature.empty()) Priority = std::max(Priority, getPriority(Feature)); diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 553c452d4ba3c2..35aceb1c58e142 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -384,7 +384,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { return CPU != llvm::X86::CK_None; } - unsigned getFMVPriority(ArrayRef<StringRef> Features) const override; + uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override; bool setFPMath(StringRef Name) override; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 5f15f0f48c54e4..7db1ed72fa5cde 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4241,7 +4241,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); -static unsigned getFMVPriority(const TargetInfo &TI, +static uint64_t getFMVPriority(const TargetInfo &TI, const CodeGenFunction::FMVResolverOption &RO) { llvm::SmallVector<StringRef, 8> Features{RO.Features}; if (RO.Architecture) diff --git a/clang/test/CodeGen/AArch64/fmv-priority.c b/clang/test/CodeGen/AArch64/fmv-priority.c new file mode 100644 index 00000000000000..db410e36a216a3 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fmv-priority.c @@ -0,0 +1,54 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s + +// Priority biskmasks after feature dependency expansion: +// +// MSB LSB +// +// sme2 | ls64 | sme | bf16 | | | fp16 | simd | fp +// -----+------+-----+------+-------+------+------+------+--- +// sme2 | | sme | bf16 | rcpc2 | rcpc | fp16 | simd | fp +// +// Dependencies should not affect priorities, since a +// feature can only depend on lower priority features. + +__attribute__((target_version("sme2+ls64"))) int fn(void); +__attribute__((target_version("sme2+rcpc2"))) int fn(void); +__attribute__((target_version("default"))) int fn(void) { return 0; } + +int call() { return fn(); } + +// CHECK-LABEL: define dso_local i32 @fn.default( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define dso_local i32 @call( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 @fn() +// CHECK-NEXT: ret i32 [[CALL]] +// +// +// CHECK-LABEL: define weak_odr ptr @fn.resolver() comdat { +// CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] +// CHECK-NEXT: call void @__init_cpu_features_resolver() +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 153126785511392000 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 153126785511392000 +// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] +// CHECK: [[RESOLVER_RETURN]]: +// CHECK-NEXT: ret ptr @fn._Mls64Msme2 +// CHECK: [[RESOLVER_ELSE]]: +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 144119586269233920 +// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 144119586269233920 +// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-NEXT: br i1 [[TMP7]], label %[[RESOLVER_RETURN1:.*]], label %[[RESOLVER_ELSE2:.*]] +// CHECK: [[RESOLVER_RETURN1]]: +// CHECK-NEXT: ret ptr @fn._Mrcpc2Msme2 +// CHECK: [[RESOLVER_ELSE2]]: +// CHECK-NEXT: ret ptr @fn.default +// diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index ea2994b8e5c39d..336d8b0a4dffa0 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -479,20 +479,20 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: ret ptr @fmv._Mflagm2Msme-i16i64 // CHECK: resolver_else2: // CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 9007199254741776 -// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 9007199254741776 +// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 9007199254742016 +// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 9007199254742016 // CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]] // CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] // CHECK: resolver_return3: -// CHECK-NEXT: ret ptr @fmv._MdotprodMls64 +// CHECK-NEXT: ret ptr @fmv._McrcMls64 // CHECK: resolver_else4: // CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 9007199254742016 -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 9007199254742016 +// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 9007199254741776 +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 9007199254741776 // CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]] // CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]] // CHECK: resolver_return5: -// CHECK-NEXT: ret ptr @fmv._McrcMls64 +// CHECK-NEXT: ret ptr @fmv._MdotprodMls64 // CHECK: resolver_else6: // CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1125899906842624 diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 8ec2201b07db53..b577cc757a2df6 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -270,7 +270,7 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values); bool isX18ReservedByDefault(const Triple &TT); // Return the priority for a given set of FMV features. -unsigned getFMVPriority(ArrayRef<StringRef> Features); +uint64_t getFMVPriority(ArrayRef<StringRef> Features); // For given feature names, return a bitmask corresponding to the entries of // AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks themselves, diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index ff9ba79d558a3f..7d0b8c333f72fa 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -48,11 +48,11 @@ std::optional<AArch64::ArchInfo> AArch64::ArchInfo::findBySubArch(StringRef SubA return {}; } -unsigned AArch64::getFMVPriority(ArrayRef<StringRef> Features) { - unsigned Priority = 0; +uint64_t AArch64::getFMVPriority(ArrayRef<StringRef> Features) { + uint64_t Priority = 0; for (StringRef Feature : Features) if (std::optional<FMVInfo> Info = parseFMVExtension(Feature)) - Priority = std::max(Priority, static_cast<unsigned>(Info->PriorityBit)); + Priority |= (1ULL << Info->PriorityBit); return Priority; } >From 89b8c638e4dba32cda9f803ffabe852408c087ed Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Wed, 8 Jan 2025 11:02:50 +0000 Subject: [PATCH 3/4] Move FeatPriorities in a separate file because AArch64CPUFeatures.inc is meant to be in sync with compiler-rt. --- .../llvm/TargetParser/AArch64CPUFeatures.inc | 57 +------------------ .../llvm/TargetParser/AArch64TargetParser.h | 1 + 2 files changed, 3 insertions(+), 55 deletions(-) diff --git a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc index 998d270b2058c8..6b373ce424678a 100644 --- a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc +++ b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc @@ -15,13 +15,10 @@ // changes in this file, first modify the primary copy and copy it over to // compiler-rt. compiler-rt tests will fail if the two files are not synced up. // -// Additionally this file enumerates the feature priorities in ascending order, -// as defined in the ACLE specification. -// //===----------------------------------------------------------------------===// -#ifndef AARCH64_CPU_FEATURES_INC_H -#define AARCH64_CPU_FEATURES_INC_H +#ifndef AARCH64_CPU_FEATURS_INC_H +#define AARCH64_CPU_FEATURS_INC_H // Function Multi Versioning CPU features. enum CPUFeatures { @@ -91,54 +88,4 @@ enum CPUFeatures { FEAT_INIT // Used as flag of features initialization completion }; -// Function Multi Versioning feature priorities in ascending order. -enum FeatPriorities { - PRIOR_RNG, - PRIOR_FLAGM, - PRIOR_FLAGM2, - PRIOR_LSE, - PRIOR_FP, - PRIOR_SIMD, - PRIOR_DOTPROD, - PRIOR_SM4, - PRIOR_RDM, - PRIOR_CRC, - PRIOR_SHA2, - PRIOR_SHA3, - PRIOR_PMULL, - PRIOR_FP16, - PRIOR_FP16FML, - PRIOR_DIT, - PRIOR_DPB, - PRIOR_DPB2, - PRIOR_JSCVT, - PRIOR_FCMA, - PRIOR_RCPC, - PRIOR_RCPC2, - PRIOR_RCPC3, - PRIOR_FRINTTS, - PRIOR_I8MM, - PRIOR_BF16, - PRIOR_SVE, - PRIOR_SVE_F32MM, - PRIOR_SVE_F64MM, - PRIOR_SVE2, - PRIOR_SVE_PMULL128, - PRIOR_SVE_BITPERM, - PRIOR_SVE_SHA3, - PRIOR_SVE_SM4, - PRIOR_SME, - PRIOR_MEMTAG2, - PRIOR_SB, - PRIOR_PREDRES, - PRIOR_SSBS2, - PRIOR_BTI, - PRIOR_LS64_ACCDATA, - PRIOR_WFXT, - PRIOR_SME_F64, - PRIOR_SME_I64, - PRIOR_SME2, - PRIOR_MOPS -}; - #endif diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index b577cc757a2df6..63f06a3a692982 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -36,6 +36,7 @@ struct ArchInfo; struct CpuInfo; #include "llvm/TargetParser/AArch64CPUFeatures.inc" +#include "llvm/TargetParser/AArch64FeatPriorities.inc" static_assert(FEAT_MAX < 62, "Number of features in CPUFeatures are limited to 62 entries"); >From f197b263d7449d2c67852b5a592a28d72595b3b8 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas <alexandros.lamprin...@arm.com> Date: Wed, 8 Jan 2025 11:05:32 +0000 Subject: [PATCH 4/4] Forgot to add the new file AArch64FeatPriorities.inc --- .../TargetParser/AArch64FeatPriorities.inc | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc diff --git a/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc new file mode 100644 index 00000000000000..96af618032aea3 --- /dev/null +++ b/llvm/include/llvm/TargetParser/AArch64FeatPriorities.inc @@ -0,0 +1,66 @@ +//===- AArch64FeatPriorities.inc - AArch64 FMV Priorities enum --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file enumerates the AArch64 FMV features sorted in ascending priority. +// +//===----------------------------------------------------------------------===// + +#ifndef AARCH64_FEAT_PRIORITIES_INC_H +#define AARCH64_FEAT_PRIORITIES_INC_H + +// Function Multi Versioning feature priorities. +enum FeatPriorities { + PRIOR_RNG, + PRIOR_FLAGM, + PRIOR_FLAGM2, + PRIOR_LSE, + PRIOR_FP, + PRIOR_SIMD, + PRIOR_DOTPROD, + PRIOR_SM4, + PRIOR_RDM, + PRIOR_CRC, + PRIOR_SHA2, + PRIOR_SHA3, + PRIOR_PMULL, + PRIOR_FP16, + PRIOR_FP16FML, + PRIOR_DIT, + PRIOR_DPB, + PRIOR_DPB2, + PRIOR_JSCVT, + PRIOR_FCMA, + PRIOR_RCPC, + PRIOR_RCPC2, + PRIOR_RCPC3, + PRIOR_FRINTTS, + PRIOR_I8MM, + PRIOR_BF16, + PRIOR_SVE, + PRIOR_SVE_F32MM, + PRIOR_SVE_F64MM, + PRIOR_SVE2, + PRIOR_SVE_PMULL128, + PRIOR_SVE_BITPERM, + PRIOR_SVE_SHA3, + PRIOR_SVE_SM4, + PRIOR_SME, + PRIOR_MEMTAG2, + PRIOR_SB, + PRIOR_PREDRES, + PRIOR_SSBS2, + PRIOR_BTI, + PRIOR_LS64_ACCDATA, + PRIOR_WFXT, + PRIOR_SME_F64, + PRIOR_SME_I64, + PRIOR_SME2, + PRIOR_MOPS +}; + +#endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits