erichkeane updated this revision to Diff 414650. erichkeane added a comment. Herald added a project: LLVM. Herald added a subscriber: llvm-commits.
add a 'translation' feature to the x86 target so that we can get the 'tune cpu' name from the list. Note that there are 9 with blanks that I was unable to figure out the corresponding name (I have an email out to @andrew.w.kaylor and @pengfei to tell me what it should be). In the meantime, these will result in NO tune-cpu. Also note that I intentionally added this conversion from the 'alias' as well. This gives us the power to use an alias to change the 'tune' if we care to. Typically I'd consider this unimportant, but it means that previously mentioned VendorA (@arsenm) could simply add their processors as aliases and get the tune feature more easily. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121410/new/ https://reviews.llvm.org/D121410 Files: clang/include/clang/Basic/TargetInfo.h clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/CodeGen/CodeGenModule.cpp clang/test/CodeGen/attr-cpuspecific-avx-abi.c clang/test/CodeGen/attr-cpuspecific.c llvm/include/llvm/Support/X86TargetParser.def
Index: llvm/include/llvm/Support/X86TargetParser.def =================================================================== --- llvm/include/llvm/Support/X86TargetParser.def +++ llvm/include/llvm/Support/X86TargetParser.def @@ -211,47 +211,47 @@ #undef X86_FEATURE #ifndef CPU_SPECIFIC -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) #endif #ifndef CPU_SPECIFIC_ALIAS -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) #endif -CPU_SPECIFIC("generic", 'A', "") -CPU_SPECIFIC("pentium", 'B', "") -CPU_SPECIFIC("pentium_pro", 'C', "+cmov") -CPU_SPECIFIC("pentium_mmx", 'D', "+mmx") -CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx") -CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse") -CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii") -CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2") -CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2") -CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") -CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") -CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") -CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") -CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") -CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") -CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx") -CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge") -CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx") -CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge") -CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") -CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell") -CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") -CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") -CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell") -CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") -CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd") -CPU_SPECIFIC_ALIAS("mic_avx512", "knl") -CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx") -CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb") -CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi") -CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq") +CPU_SPECIFIC("generic", "generic", 'A', "") +CPU_SPECIFIC("pentium", "pentium", 'B', "") +CPU_SPECIFIC("pentium_pro", "pentiumpro", 'C', "+cmov") +CPU_SPECIFIC("pentium_mmx", "pentium-mmx", 'D', "+mmx") +CPU_SPECIFIC("pentium_ii", "pentium2", 'E', "+cmov,+mmx") +CPU_SPECIFIC("pentium_iii", "pentium3", 'H', "+cmov,+mmx,+sse") +CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium3", "pentium_iii") +CPU_SPECIFIC("pentium_4", "pentium4", 'J', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_m", "pentium-m", 'K', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_4_sse3", "", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") +CPU_SPECIFIC("core_2_duo_ssse3", "", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") +CPU_SPECIFIC("core_2_duo_sse4_1", "", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") +CPU_SPECIFIC("atom", "atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") +CPU_SPECIFIC("atom_sse4_2", "", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_i7_sse4_2", "", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_aes_pclmulqdq", "", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("atom_sse4_2_movbe", "", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("goldmont", "goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("sandybridge", "sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx") +CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge", "sandybridge") +CPU_SPECIFIC("ivybridge", "ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx") +CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge", "ivybridge") +CPU_SPECIFIC("haswell", "haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell", "haswell") +CPU_SPECIFIC("core_4th_gen_avx_tsx", "", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC("broadwell", "broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell", "broadwell") +CPU_SPECIFIC("core_5th_gen_avx_tsx", "", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC("knl", "knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd") +CPU_SPECIFIC_ALIAS("mic_avx512", "knl", "knl") +CPU_SPECIFIC("skylake", "skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx") +CPU_SPECIFIC( "skylake_avx512", "skylake-avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb") +CPU_SPECIFIC("cannonlake", "cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi") +CPU_SPECIFIC("knm", "knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq") #undef CPU_SPECIFIC_ALIAS #undef CPU_SPECIFIC Index: clang/test/CodeGen/attr-cpuspecific.c =================================================================== --- clang/test/CodeGen/attr-cpuspecific.c +++ clang/test/CodeGen/attr-cpuspecific.c @@ -340,5 +340,8 @@ void OrderDispatchUsageSpecific(void) {} // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="ivybridge" // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="knl" // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" +// CHECK-SAME: "tune-cpu"="atom" Index: clang/test/CodeGen/attr-cpuspecific-avx-abi.c =================================================================== --- clang/test/CodeGen/attr-cpuspecific-avx-abi.c +++ clang/test/CodeGen/attr-cpuspecific-avx-abi.c @@ -23,4 +23,6 @@ // CHECK: define{{.*}} @foo.V() #[[V:[0-9]+]] // CHECK: attributes #[[A]] = {{.*}}"target-features"="+avx,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="generic" // CHECK: attributes #[[V]] = {{.*}}"target-features"="+avx,+avx2,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="haswell" Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2060,6 +2060,13 @@ getTarget().isValidCPUName(ParsedAttr.Tune)) TuneCPU = ParsedAttr.Tune; } + + if (SD) { + // Apply the given CPU name as the 'tune-cpu' so that the optimizer can + // favor this processor. + TuneCPU = getTarget().getCPUSpecificTuneName( + SD->getCPUName(GD.getMultiVersionIndex())->getName()); + } } else { // Otherwise just add the existing target cpu and target features to the // function. Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -201,6 +201,8 @@ StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const override; + StringRef getCPUSpecificTuneName(StringRef Name) const override; + Optional<unsigned> getCPUCacheLineSize() const override; bool validateAsmConstraint(const char *&Name, Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -1095,22 +1095,22 @@ bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const { return llvm::StringSwitch<bool>(Name) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, true) -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, true) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, true) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, true) #include "llvm/Support/X86TargetParser.def" .Default(false); } static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) { return llvm::StringSwitch<StringRef>(Name) -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, NAME) #include "llvm/Support/X86TargetParser.def" .Default(Name); } char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const { return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name)) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) #include "llvm/Support/X86TargetParser.def" .Default(0); } @@ -1119,12 +1119,20 @@ StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const { StringRef WholeList = llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name)) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) #include "llvm/Support/X86TargetParser.def" .Default(""); WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); } +StringRef X86TargetInfo::getCPUSpecificTuneName(StringRef Name) const { + return llvm::StringSwitch<StringRef>(Name) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, TUNE_NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, TUNE_NAME) +#include "llvm/Support/X86TargetParser.def" + .Default(""); +} + // We can't use a generic validation scheme for the cpus accepted here // versus subtarget cpus accepted in the target attribute because the // variables intitialized by the runtime only support the below currently Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -1375,6 +1375,13 @@ "cpu_specific Multiversioning not implemented on this target"); } + // Get the value for the 'tune-cpu' flag for a cpu_specific variant with the + // programmer-specified 'Name'. + virtual StringRef getCPUSpecificTuneName(StringRef Name) const { + llvm_unreachable( + "cpu_specific Multiversioning not implemented on this target"); + } + // Get a list of the features that make up the CPU option for // cpu_specific/cpu_dispatch so that it can be passed to llvm as optimization // options.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits