================ @@ -176,25 +176,43 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { SmallVector<StringRef, 32> Lines; ProcCpuinfoContent.split(Lines, '\n'); - // Look for the CPU implementer line. + // Look for the CPU implementer and hardware lines, and store the CPU part + // numbers found. StringRef Implementer; StringRef Hardware; - StringRef Part; + SmallVector<StringRef, 32> Parts; for (unsigned I = 0, E = Lines.size(); I != E; ++I) { if (Lines[I].starts_with("CPU implementer")) Implementer = Lines[I].substr(15).ltrim("\t :"); if (Lines[I].starts_with("Hardware")) Hardware = Lines[I].substr(8).ltrim("\t :"); if (Lines[I].starts_with("CPU part")) - Part = Lines[I].substr(8).ltrim("\t :"); + Parts.emplace_back(Lines[I].substr(8).ltrim("\t :")); } + // Last `Part' seen, in case we don't analyse all `Parts' parsed. + StringRef Part = Parts.empty() ? StringRef() : Parts.back(); + + // Remove duplicate `Parts'. + llvm::sort(Parts); + Parts.erase(llvm::unique(Parts), Parts.end()); + + auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) { + if (Parts.size() == 2) + return (Parts[0] == Big && Parts[1] == Little) || + (Parts[1] == Big && Parts[0] == Little); + return false; + }; + if (Implementer == "0x41") { // ARM Ltd. // MSM8992/8994 may give cpu part for the core that the kernel is running on, // which is undeterministic and wrong. Always return cortex-a53 for these SoC. if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996")) return "cortex-a53"; + // Detect big.LITTLE systems. + if (MatchBigLittle(Parts, "0xd85", "0xd87")) + return "gb10"; ---------------- davemgreen wrote:
Hi - I'm not a fan of cortex-x925.cortex-a725 style "cpus", we haven't needed them in the past and it would be a shame to add them now. As you might imagine there are a lot of possibilities for them and I don't think they add a lot in terms of performance or usability. Either the user wants to tune for the x925 or the a725 and so can opt themselves what to tune for (the architecture features are the same), or the compiler has to choose and usually just picks the big cpu, like you have done here where gb10 is just a cortex-x925 in terms of tuning. As far as I understand, the aim here is just to pick a valid, consistent, cpu. I think we should do that and not have it dependant on the input order. But that just means picking one of the cpus (probably the "largest" after sorting?). (I am wondering if adding gb10 is a mistake, as the underlying CPU is just a x925 or a725 and we would not usually add SoC level chips. It looks like what you really need is for -mcpu=cortex-x925+crypto to work properly, but I think is was a mistake to not have that be the default! Lets at least fix the native detection to pick one of the cpus, not pick a completely different SoC on a system not related to nvidia). https://github.com/llvm/llvm-project/pull/146515 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits