r339759 - Fix ASTMatchersTraversalTest testcase compile on older compilers

2018-08-15 Thread David Green via cfe-commits
Author: dmgreen
Date: Wed Aug 15 03:39:43 2018
New Revision: 339759

URL: http://llvm.org/viewvc/llvm-project?rev=339759&view=rev
Log:
Fix ASTMatchersTraversalTest testcase compile on older compilers

Some versions of gcc, especially when invoked through ccache (-E), can have
trouble with raw string literals inside macros. This moves the string out of
the macro.

Modified:
cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp

Modified: cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp?rev=339759&r1=339758&r2=339759&view=diff
==
--- cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp (original)
+++ cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp Wed Aug 15 
03:39:43 2018
@@ -1322,7 +1322,7 @@ TEST(IgnoringImplicit, MatchesImplicit)
 }
 
 TEST(IgnoringImplicit, MatchesNestedImplicit) {
-  EXPECT_TRUE(matches(R"(
+  StringRef Code = R"(
 
 struct OtherType;
 
@@ -1348,8 +1348,8 @@ int main()
 {
 SomeType i = something();
 }
-)"
-  , varDecl(
+)";
+  EXPECT_TRUE(matches(Code, varDecl(
   hasName("i"),
   hasInitializer(exprWithCleanups(has(
 cxxConstructExpr(has(expr(ignoringImplicit(cxxConstructExpr(


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r340398 - [AArch64] Add Tiny Code Model for AArch64

2018-08-22 Thread David Green via cfe-commits
Author: dmgreen
Date: Wed Aug 22 04:34:28 2018
New Revision: 340398

URL: http://llvm.org/viewvc/llvm-project?rev=340398&view=rev
Log:
[AArch64] Add Tiny Code Model for AArch64

Adds a tiny code model to Clang along side rL340397.

Differential Revision: https://reviews.llvm.org/D49674

Modified:
cfe/trunk/include/clang/Driver/CC1Options.td
cfe/trunk/lib/CodeGen/BackendUtil.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
cfe/trunk/test/Driver/code-model.c

Modified: cfe/trunk/include/clang/Driver/CC1Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/CC1Options.td?rev=340398&r1=340397&r2=340398&view=diff
==
--- cfe/trunk/include/clang/Driver/CC1Options.td (original)
+++ cfe/trunk/include/clang/Driver/CC1Options.td Wed Aug 22 04:34:28 2018
@@ -249,7 +249,7 @@ def new_struct_path_tbaa : Flag<["-"], "
 def masm_verbose : Flag<["-"], "masm-verbose">,
   HelpText<"Generate verbose assembly output">;
 def mcode_model : Separate<["-"], "mcode-model">,
-  HelpText<"The code model to use">, Values<"small,kernel,medium,large">;
+  HelpText<"The code model to use">, Values<"tiny,small,kernel,medium,large">;
 def mdebug_pass : Separate<["-"], "mdebug-pass">,
   HelpText<"Enable additional debug output">;
 def mdisable_fp_elim : Flag<["-"], "mdisable-fp-elim">,

Modified: cfe/trunk/lib/CodeGen/BackendUtil.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/BackendUtil.cpp?rev=340398&r1=340397&r2=340398&view=diff
==
--- cfe/trunk/lib/CodeGen/BackendUtil.cpp (original)
+++ cfe/trunk/lib/CodeGen/BackendUtil.cpp Wed Aug 22 04:34:28 2018
@@ -368,6 +368,7 @@ static CodeGenOpt::Level getCGOptLevel(c
 static Optional
 getCodeModel(const CodeGenOptions &CodeGenOpts) {
   unsigned CodeModel = llvm::StringSwitch(CodeGenOpts.CodeModel)
+   .Case("tiny", llvm::CodeModel::Tiny)
.Case("small", llvm::CodeModel::Small)
.Case("kernel", llvm::CodeModel::Kernel)
.Case("medium", llvm::CodeModel::Medium)

Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=340398&r1=340397&r2=340398&view=diff
==
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Wed Aug 22 04:34:28 2018
@@ -370,7 +370,7 @@ static StringRef getCodeModel(ArgList &A
   if (Arg *A = Args.getLastArg(OPT_mcode_model)) {
 StringRef Value = A->getValue();
 if (Value == "small" || Value == "kernel" || Value == "medium" ||
-Value == "large")
+Value == "large" || Value == "tiny")
   return Value;
 Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Value;
   }

Modified: cfe/trunk/test/Driver/code-model.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/code-model.c?rev=340398&r1=340397&r2=340398&view=diff
==
--- cfe/trunk/test/Driver/code-model.c (original)
+++ cfe/trunk/test/Driver/code-model.c Wed Aug 22 04:34:28 2018
@@ -1,9 +1,11 @@
+// RUN: %clang -### -c -mcmodel=tiny %s 2>&1 | FileCheck -check-prefix 
CHECK-TINY %s
 // RUN: %clang -### -c -mcmodel=small %s 2>&1 | FileCheck -check-prefix 
CHECK-SMALL %s
 // RUN: %clang -### -S -mcmodel=kernel %s 2>&1 | FileCheck -check-prefix 
CHECK-KERNEL %s
 // RUN: %clang -### -c -mcmodel=medium %s 2>&1 | FileCheck -check-prefix 
CHECK-MEDIUM %s
 // RUN: %clang -### -S -mcmodel=large %s 2>&1 | FileCheck -check-prefix 
CHECK-LARGE %s
 // RUN: not %clang -c -mcmodel=lager %s 2>&1 | FileCheck -check-prefix 
CHECK-INVALID %s
 
+// CHECK-TINY: "-mcode-model" "tiny"
 // CHECK-SMALL: "-mcode-model" "small"
 // CHECK-KERNEL: "-mcode-model" "kernel"
 // CHECK-MEDIUM: "-mcode-model" "medium"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r342053 - [CodeGen] Align rtti and vtable data

2018-09-12 Thread David Green via cfe-commits
Author: dmgreen
Date: Wed Sep 12 07:09:06 2018
New Revision: 342053

URL: http://llvm.org/viewvc/llvm-project?rev=342053&view=rev
Log:
[CodeGen] Align rtti and vtable data

Previously the alignment on the newly created rtti/typeinfo data was largely
not set, meaning that DataLayout::getPreferredAlignment was free to overalign
it to 16 bytes. This causes unnecessary code bloat.

Differential Revision: https://reviews.llvm.org/D51416

Modified:
cfe/trunk/lib/CodeGen/CGVTT.cpp
cfe/trunk/lib/CodeGen/CGVTables.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.h
cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp
cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp
cfe/trunk/test/CodeGenCXX/microsoft-abi-vbtables.cpp
cfe/trunk/test/CodeGenCXX/vtable-align.cpp
cfe/trunk/test/CodeGenCXX/vtable-linkage.cpp

Modified: cfe/trunk/lib/CodeGen/CGVTT.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTT.cpp?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CGVTT.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGVTT.cpp Wed Sep 12 07:09:06 2018
@@ -119,10 +119,10 @@ llvm::GlobalVariable *CodeGenVTables::Ge
 
   llvm::ArrayType *ArrayType =
 llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size());
+  unsigned Align = CGM.getDataLayout().getABITypeAlignment(CGM.Int8PtrTy);
 
-  llvm::GlobalVariable *GV =
-CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType,
-  llvm::GlobalValue::ExternalLinkage);
+  llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
+  Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align);
   GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   return GV;
 }

Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTables.cpp?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CGVTables.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGVTables.cpp Wed Sep 12 07:09:06 2018
@@ -756,9 +756,11 @@ CodeGenVTables::GenerateConstructionVTab
   if (Linkage == llvm::GlobalVariable::AvailableExternallyLinkage)
 Linkage = llvm::GlobalVariable::InternalLinkage;
 
+  unsigned Align = CGM.getDataLayout().getABITypeAlignment(VTType);
+
   // Create the variable that will hold the construction vtable.
   llvm::GlobalVariable *VTable =
-CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage);
+  CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage, Align);
   CGM.setGVProperties(VTable, RD);
 
   // V-tables are always unnamed_addr.

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Wed Sep 12 07:09:06 2018
@@ -3099,10 +3099,9 @@ CodeGenModule::GetAddrOfGlobal(GlobalDec
   IsForDefinition);
 }
 
-llvm::GlobalVariable *
-CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
-  llvm::Type *Ty,
-  llvm::GlobalValue::LinkageTypes Linkage) 
{
+llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
+StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage,
+unsigned Alignment) {
   llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name);
   llvm::GlobalVariable *OldGV = nullptr;
 
@@ -3138,6 +3137,8 @@ CodeGenModule::CreateOrReplaceCXXRuntime
   !GV->hasAvailableExternallyLinkage())
 GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
 
+  GV->setAlignment(Alignment);
+
   return GV;
 }
 

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.h Wed Sep 12 07:09:06 2018
@@ -764,7 +764,8 @@ public:
   /// bitcast to the new variable.
   llvm::GlobalVariable *
   CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty,
-llvm::GlobalValue::LinkageTypes Linkage);
+llvm::GlobalValue::LinkageTypes Linkage,
+unsigned Alignment);
 
   llvm::Function *
   CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name,

Modified: cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp?rev=34205

r338566 - [UnrollAndJam] Add unroll_and_jam pragma handling

2018-08-01 Thread David Green via cfe-commits
Author: dmgreen
Date: Wed Aug  1 07:36:12 2018
New Revision: 338566

URL: http://llvm.org/viewvc/llvm-project?rev=338566&view=rev
Log:
[UnrollAndJam] Add unroll_and_jam pragma handling

This adds support for the unroll_and_jam pragma, to go with the recently
added unroll and jam pass. The name of the pragma is the same as is used
in the Intel compiler, and most of the code works the same as for unroll.

#pragma clang loop unroll_and_jam has been separated into a different
patch. This part adds #pragma unroll_and_jam with an optional count, and
#pragma no_unroll_and_jam to disable the transform.

Differential Revision: https://reviews.llvm.org/D47267


Added:
cfe/trunk/test/CodeGenCXX/pragma-unroll-and-jam.cpp
cfe/trunk/test/Parser/pragma-unroll-and-jam.cpp
Modified:
cfe/trunk/include/clang/Basic/Attr.td
cfe/trunk/include/clang/Parse/Parser.h
cfe/trunk/lib/CodeGen/CGLoopInfo.cpp
cfe/trunk/lib/CodeGen/CGLoopInfo.h
cfe/trunk/lib/Parse/ParsePragma.cpp
cfe/trunk/lib/Sema/SemaStmtAttr.cpp

Modified: cfe/trunk/include/clang/Basic/Attr.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?rev=338566&r1=338565&r2=338566&view=diff
==
--- cfe/trunk/include/clang/Basic/Attr.td (original)
+++ cfe/trunk/include/clang/Basic/Attr.td Wed Aug  1 07:36:12 2018
@@ -2748,6 +2748,8 @@ def LoopHint : Attr {
   /// interleave_count: interleaves 'Value' loop iterations.
   /// unroll: fully unroll loop if State == Enable.
   /// unroll_count: unrolls loop 'Value' times.
+  /// unroll_and_jam: attempt to unroll and jam loop if State == Enable.
+  /// unroll_and_jam_count: unroll and jams loop 'Value' times.
   /// distribute: attempt to distribute loop if State == Enable
 
   /// #pragma unroll  directive
@@ -2756,14 +2758,17 @@ def LoopHint : Attr {
   /// expression: unrolls loop 'Value' times.
 
   let Spellings = [Pragma<"clang", "loop">, Pragma<"", "unroll">,
-   Pragma<"", "nounroll">];
+   Pragma<"", "nounroll">, Pragma<"", "unroll_and_jam">,
+   Pragma<"", "nounroll_and_jam">];
 
   /// State of the loop optimization specified by the spelling.
   let Args = [EnumArgument<"Option", "OptionType",
   ["vectorize", "vectorize_width", "interleave", 
"interleave_count",
-   "unroll", "unroll_count", "distribute"],
+   "unroll", "unroll_count", "unroll_and_jam", 
"unroll_and_jam_count",
+   "distribute"],
   ["Vectorize", "VectorizeWidth", "Interleave", 
"InterleaveCount",
-   "Unroll", "UnrollCount", "Distribute"]>,
+   "Unroll", "UnrollCount", "UnrollAndJam", 
"UnrollAndJamCount",
+   "Distribute"]>,
   EnumArgument<"State", "LoopHintState",
["enable", "disable", "numeric", "assume_safety", 
"full"],
["Enable", "Disable", "Numeric", "AssumeSafety", 
"Full"]>,
@@ -2778,6 +2783,8 @@ def LoopHint : Attr {
 case InterleaveCount: return "interleave_count";
 case Unroll: return "unroll";
 case UnrollCount: return "unroll_count";
+case UnrollAndJam: return "unroll_and_jam";
+case UnrollAndJamCount: return "unroll_and_jam_count";
 case Distribute: return "distribute";
 }
 llvm_unreachable("Unhandled LoopHint option.");
@@ -2787,9 +2794,9 @@ def LoopHint : Attr {
 unsigned SpellingIndex = getSpellingListIndex();
 // For "#pragma unroll" and "#pragma nounroll" the string "unroll" or
 // "nounroll" is already emitted as the pragma name.
-if (SpellingIndex == Pragma_nounroll)
+if (SpellingIndex == Pragma_nounroll || SpellingIndex == 
Pragma_nounroll_and_jam)
   return;
-else if (SpellingIndex == Pragma_unroll) {
+else if (SpellingIndex == Pragma_unroll || SpellingIndex == 
Pragma_unroll_and_jam) {
   OS << ' ' << getValueString(Policy);
   return;
 }
@@ -2825,6 +2832,11 @@ def LoopHint : Attr {
   return "#pragma nounroll";
 else if (SpellingIndex == Pragma_unroll)
   return "#pragma unroll" + (option == UnrollCount ? 
getValueString(Policy) : "");
+else if (SpellingIndex == Pragma_nounroll_and_jam)
+  return "#pragma nounroll_and_jam";
+else if (SpellingIndex == Pragma_unroll_and_jam)
+  return "#pragma unroll_and_jam" +
+(option == UnrollAndJamCount ? getValueString(Policy) : "");
 
 assert(SpellingIndex == Pragma_clang_loop && "Unexpected spelling");
 return getOptionName(option) + getValueString(Policy);

Modified: cfe/trunk/include/clang/Parse/Parser.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Parse/Parser.h?rev=338566&r1=338565&r2=338566&view=diff
==
--- cfe/trunk/include/clang/Pars

Re: r348020 - Reverting r347949-r347951 because they broke the test bots.

2018-11-30 Thread David Green via cfe-commits
Hello!


Did you intend to change the .gitignore here too?


Cheers

Dave


From: cfe-commits  on behalf of Aaron 
Ballman via cfe-commits 
Sent: 30 November 2018 18:52:51
To: cfe-commits@lists.llvm.org
Subject: r348020 - Reverting r347949-r347951 because they broke the test bots.

Author: aaronballman
Date: Fri Nov 30 10:52:51 2018
New Revision: 348020

URL: http://llvm.org/viewvc/llvm-project?rev=348020&view=rev
Log:
Reverting r347949-r347951 because they broke the test bots.

http://lab.llvm.org:8011/builders/clang-cmake-armv8-lld/builds/440/steps/ninja%20check%202/logs/FAIL%3A%20Clang%3A%3Aosobject-retain-release.cpp

Modified:
cfe/trunk/.gitignore
cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h
cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp
cfe/trunk/test/Analysis/osobject-retain-release.cpp
cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test

Modified: cfe/trunk/.gitignore
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/.gitignore?rev=348020&r1=348019&r2=348020&view=diff
==
--- cfe/trunk/.gitignore (original)
+++ cfe/trunk/.gitignore Fri Nov 30 10:52:51 2018
@@ -29,8 +29,6 @@ cscope.out
 
#==#
 # Directories to ignore (do not add trailing '/'s, they skip symlinks).
 
#==#
-# Clang extra user tools, which is tracked independently (clang-tools-extra).
-tools/extra
 # Sphinx build products
 docs/_build
 docs/analyzer/_build

Modified: cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h?rev=348020&r1=348019&r2=348020&view=diff
==
--- cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h 
(original)
+++ cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h Fri Nov 
30 10:52:51 2018
@@ -530,8 +530,6 @@ class RetainSummaryManager {
   /// Decrement the reference count on OS object.
   const RetainSummary *getOSSummaryReleaseRule(const FunctionDecl *FD);

-  /// Free the OS object.
-  const RetainSummary *getOSSummaryFreeRule(const FunctionDecl *FD);

   enum UnaryFuncKind { cfretain, cfrelease, cfautorelease, cfmakecollectable };


Modified: cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp?rev=348020&r1=348019&r2=348020&view=diff
==
--- cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp Fri Nov 30 
10:52:51 2018
@@ -124,8 +124,10 @@ RetainSummaryManager::generateSummary(co
   }

   const IdentifierInfo *II = FD->getIdentifier();
+  if (!II)
+return getDefaultSummary();

-  StringRef FName = II ? II->getName() : "";
+  StringRef FName = II->getName();

   // Strip away preceding '_'.  Doing this here will effect all the checks
   // down below.
@@ -302,12 +304,6 @@ RetainSummaryManager::generateSummary(co

   if (FName == "retain")
 return getOSSummaryRetainRule(FD);
-
-  if (FName == "free")
-return getOSSummaryFreeRule(FD);
-
-  if (MD->getOverloadedOperator() == OO_New)
-return getOSSummaryCreateRule(MD);
 }
   }

@@ -495,11 +491,9 @@ RetainSummaryManager::getSummary(const C
   case CE_CXXConstructor:
 Summ = getFunctionSummary(cast(Call).getDecl());
 break;
-  case CE_CXXAllocator:
-Summ = getFunctionSummary(cast(Call).getDecl());
-break;
   case CE_Block:
   case CE_CXXDestructor:
+  case CE_CXXAllocator:
 // FIXME: These calls are currently unsupported.
 return getPersistentStopSummary();
   case CE_ObjCMessage: {
@@ -625,14 +619,6 @@ RetainSummaryManager::getOSSummaryReleas
 }

 const RetainSummary *
-RetainSummaryManager::getOSSummaryFreeRule(const FunctionDecl *FD) {
-  return getPersistentSummary(RetEffect::MakeNoRet(),
-  /*ReceiverEff=*/DoNothing,
-  /*DefaultEff=*/DoNothing,
-  /*ThisEff=*/Dealloc);
-}
-
-const RetainSummary *
 RetainSummaryManager::getOSSummaryCreateRule(const FunctionDecl *FD) {
   return getPersistentSummary(RetEffect::MakeOwned(RetEffect::OS));
 }

Modified: cfe/trunk/test/Analysis/osobject-retain-release.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/osobject-retain-release.cpp?rev=348020&r1=348019&r2=348020&view=diff
==
--- cfe/trunk/test/Analysis/osobject-retain-release.cpp (original)
+++ cfe/trunk/test/Analysis/oso

Re: r348020 - Reverting r347949-r347951 because they broke the test bots.

2018-11-30 Thread David Green via cfe-commits
Thanks for the fix, And thanks for the work keeping the bots green!

Dave





From: Aaron Ballman 
Sent: 30 November 2018 19:35
To: David Green
Cc: cfe-commits; nd
Subject: Re: r348020 - Reverting r347949-r347951 because they broke the test 
bots.
  

On Fri, Nov 30, 2018 at 2:31 PM David Green  wrote:
>
> Hello!
>
>
> Did you intend to change the .gitignore here too?

No! That's a great catch, sorry about that -- VS integration with
CMake is rather broken without that manual change to .gitignore. :-(
I've restored it in r348026 -- sorry for the churn there!

~Aaron

>
>
> Cheers
>
> Dave
>
> 
> From: cfe-commits  on behalf of Aaron 
> Ballman via cfe-commits 
> Sent: 30 November 2018 18:52:51
> To: cfe-commits@lists.llvm.org
> Subject: r348020 - Reverting r347949-r347951 because they broke the test bots.
>
> Author: aaronballman
> Date: Fri Nov 30 10:52:51 2018
> New Revision: 348020
>
> URL: http://llvm.org/viewvc/llvm-project?rev=348020&view=rev
> Log:
> Reverting r347949-r347951 because they broke the test bots.
>
>  
> http://lab.llvm.org:8011/builders/clang-cmake-armv8-lld/builds/440/steps/ninja%20check%202/logs/FAIL%3A%20Clang%3A%3Aosobject-retain-release.cpp
>
> Modified:
> cfe/trunk/.gitignore
> cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h
> cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp
> cfe/trunk/test/Analysis/osobject-retain-release.cpp
> cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test
>
> Modified: cfe/trunk/.gitignore
> URL:  
> http://llvm.org/viewvc/llvm-project/cfe/trunk/.gitignore?rev=348020&r1=348019&r2=348020&view=diff
> ==
> --- cfe/trunk/.gitignore (original)
> +++ cfe/trunk/.gitignore Fri Nov 30 10:52:51 2018
> @@ -29,8 +29,6 @@ cscope.out
>  
>#==#
>  # Directories to ignore (do not add trailing '/'s, they skip symlinks).
>  
>#==#
> -# Clang extra user tools, which is tracked independently (clang-tools-extra).
> -tools/extra
>  # Sphinx build products
>  docs/_build
>  docs/analyzer/_build
>
> Modified: cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h
> URL:  
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h?rev=348020&r1=348019&r2=348020&view=diff
> ==
> --- cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h 
> (original)
> +++ cfe/trunk/include/clang/StaticAnalyzer/Core/RetainSummaryManager.h Fri 
> Nov 30 10:52:51 2018
> @@ -530,8 +530,6 @@ class RetainSummaryManager {
>    /// Decrement the reference count on OS object.
>    const RetainSummary *getOSSummaryReleaseRule(const FunctionDecl *FD);
>
> -  /// Free the OS object.
> -  const RetainSummary *getOSSummaryFreeRule(const FunctionDecl *FD);
>
>    enum UnaryFuncKind { cfretain, cfrelease, cfautorelease, cfmakecollectable 
>};
>
>
> Modified: cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp
> URL:  
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp?rev=348020&r1=348019&r2=348020&view=diff
> ==
> --- cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp (original)
> +++ cfe/trunk/lib/StaticAnalyzer/Core/RetainSummaryManager.cpp Fri Nov 30 
> 10:52:51 2018
> @@ -124,8 +124,10 @@ RetainSummaryManager::generateSummary(co
>    }
>
>    const IdentifierInfo *II = FD->getIdentifier();
> +  if (!II)
> +    return getDefaultSummary();
>
> -  StringRef FName = II ? II->getName() : "";
> +  StringRef FName = II->getName();
>
>    // Strip away preceding '_'.  Doing this here will effect all the checks
>    // down below.
> @@ -302,12 +304,6 @@ RetainSummaryManager::generateSummary(co
>
>    if (FName == "retain")
>  return getOSSummaryRetainRule(FD);
> -
> -  if (FName == "free")
> -    return getOSSummaryFreeRule(FD);
> -
> -  if (MD->getOverloadedOperator() == OO_New)
> -    return getOSSummaryCreateRule(MD);
>  }
>    }
>
> @@ -495,11 +491,9 @@ RetainSummaryManager::getSummary(const C
>    case CE_CXXConstructor:
>  Summ = getFunctionSummary(cast(Call).getDecl());
>  break;
> -  case CE_CXXAllocator:
> -    Summ = getFunctionSummary(cast(Call).getDecl());
> -    break;
>    case CE_Block:
>    case CE_CXXDestructor:
> +  case CE_CXXAllocator:
>  // FIXME: These calls are currently unsupported.
>  return getPersistentStopSummary();
>    case CE_ObjCMessage: {
> @@ -625,14 +619,6 @@ RetainSummaryManager::getOSSummaryReleas
>  }
>
>  const RetainSummary *
> -RetainSummaryManager::getOSSummaryFreeRule(const FunctionDecl *FD) {
> -  return getPersistentSummary(Re

r348582 - Add a AArch64 triple to tiny codemodel test.

2018-12-07 Thread David Green via cfe-commits
Author: dmgreen
Date: Fri Dec  7 03:16:03 2018
New Revision: 348582

URL: http://llvm.org/viewvc/llvm-project?rev=348582&view=rev
Log:
Add a AArch64 triple to tiny codemodel test.

Most other targets do not support the tiny code model.


Modified:
cfe/trunk/test/CodeGen/codemodels.c

Modified: cfe/trunk/test/CodeGen/codemodels.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/codemodels.c?rev=348582&r1=348581&r2=348582&view=diff
==
--- cfe/trunk/test/CodeGen/codemodels.c (original)
+++ cfe/trunk/test/CodeGen/codemodels.c Fri Dec  7 03:16:03 2018
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm  %s -o - | FileCheck %s 
-check-prefix=CHECK-NOMODEL
-// RUN: %clang_cc1 -emit-llvm -mcode-model tiny %s -o - | FileCheck %s 
-check-prefix=CHECK-TINY
+// RUN: %clang_cc1 -triple aarch64-unknown-none-eabi -emit-llvm -mcode-model 
tiny %s -o - | FileCheck %s -check-prefix=CHECK-TINY
 // RUN: %clang_cc1 -emit-llvm -mcode-model small %s -o - | FileCheck %s 
-check-prefix=CHECK-SMALL
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -mcode-model 
kernel %s -o - | FileCheck %s -check-prefix=CHECK-KERNEL
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -mcode-model 
medium %s -o - | FileCheck %s -check-prefix=CHECK-MEDIUM


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: r342053 - [CodeGen] Align rtti and vtable data

2018-09-17 Thread David Green via cfe-commits
Hello


Interesting, what kind of failures?


If they are causing you problems, of course feel free to revert.

Dave


From: Eric Christopher 
Sent: 17 September 2018 18:07:47
To: David Green
Cc: cfe-commits@lists.llvm.org
Subject: Re: r342053 - [CodeGen] Align rtti and vtable data

Hi David,

I'm seeing test failures after this patch. I'm trying to get a test case 
reduced, but can we revert until we figure it out?

Thanks!

-eric

On Wed, Sep 12, 2018 at 7:10 AM David Green via cfe-commits 
mailto:cfe-commits@lists.llvm.org>> wrote:
Author: dmgreen
Date: Wed Sep 12 07:09:06 2018
New Revision: 342053

URL: http://llvm.org/viewvc/llvm-project?rev=342053&view=rev
Log:
[CodeGen] Align rtti and vtable data

Previously the alignment on the newly created rtti/typeinfo data was largely
not set, meaning that DataLayout::getPreferredAlignment was free to overalign
it to 16 bytes. This causes unnecessary code bloat.

Differential Revision: https://reviews.llvm.org/D51416

Modified:
cfe/trunk/lib/CodeGen/CGVTT.cpp
cfe/trunk/lib/CodeGen/CGVTables.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.h
cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp
cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp
cfe/trunk/test/CodeGenCXX/microsoft-abi-vbtables.cpp
cfe/trunk/test/CodeGenCXX/vtable-align.cpp
cfe/trunk/test/CodeGenCXX/vtable-linkage.cpp

Modified: cfe/trunk/lib/CodeGen/CGVTT.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTT.cpp?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CGVTT.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGVTT.cpp Wed Sep 12 07:09:06 2018
@@ -119,10 +119,10 @@ llvm::GlobalVariable *CodeGenVTables::Ge

   llvm::ArrayType *ArrayType =
 llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size());
+  unsigned Align = CGM.getDataLayout().getABITypeAlignment(CGM.Int8PtrTy);

-  llvm::GlobalVariable *GV =
-CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType,
-  llvm::GlobalValue::ExternalLinkage);
+  llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
+  Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align);
   GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   return GV;
 }

Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTables.cpp?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CGVTables.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGVTables.cpp Wed Sep 12 07:09:06 2018
@@ -756,9 +756,11 @@ CodeGenVTables::GenerateConstructionVTab
   if (Linkage == llvm::GlobalVariable::AvailableExternallyLinkage)
 Linkage = llvm::GlobalVariable::InternalLinkage;

+  unsigned Align = CGM.getDataLayout().getABITypeAlignment(VTType);
+
   // Create the variable that will hold the construction vtable.
   llvm::GlobalVariable *VTable =
-CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage);
+  CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage, Align);
   CGM.setGVProperties(VTable, RD);

   // V-tables are always unnamed_addr.

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Wed Sep 12 07:09:06 2018
@@ -3099,10 +3099,9 @@ CodeGenModule::GetAddrOfGlobal(GlobalDec
   IsForDefinition);
 }

-llvm::GlobalVariable *
-CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
-  llvm::Type *Ty,
-  llvm::GlobalValue::LinkageTypes Linkage) 
{
+llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
+StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage,
+unsigned Alignment) {
   llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name);
   llvm::GlobalVariable *OldGV = nullptr;

@@ -3138,6 +3137,8 @@ CodeGenModule::CreateOrReplaceCXXRuntime
   !GV->hasAvailableExternallyLinkage())
 GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));

+  GV->setAlignment(Alignment);
+
   return GV;
 }


Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=342053&r1=342052&r2=342053&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.h Wed Sep 12 07

Re: r342525 - [NFC] Fix uncompilable test cases of ExprMutationAnalyzer.

2018-09-19 Thread David Green via cfe-commits
Thanks!


That sorted us right out.


Dave



From: Shuai Wang
Sent: 19 September 2018 21:32
To: David Green
Cc: cfe-commits@lists.llvm.org
Subject: Re: r342525 - [NFC] Fix uncompilable test cases of 
ExprMutationAnalyzer.



On Wed, Sep 19, 2018 at 3:10 AM David Green 
mailto:david.gr...@arm.com>> wrote:

Hello!


You have some code in here that looks like

 A operator+(A&&, int) {}


which is a non-void function without a return statement. Any reason to use "{}" 
and not ";"? You seem to have deliberately changed them, but that causes us 
some problems downstream. Mind if we change them to A operator+(A&&, int);? or 
will that cause you problems in other places?

You're right, I don't need to define it, fixed in 
https://reviews.llvm.org/rC342586
Thanks for pointing it out!


Cheers

Dave

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r343843 - [AArch64] Use filecheck captures for metadata node numbers in test. NFC

2018-10-05 Thread David Green via cfe-commits
Author: dmgreen
Date: Fri Oct  5 03:21:25 2018
New Revision: 343843

URL: http://llvm.org/viewvc/llvm-project?rev=343843&view=rev
Log:
[AArch64] Use filecheck captures for metadata node numbers in test. NFC

Just a quick fix for cases where extra metadata members are present.

Modified:
cfe/trunk/test/CodeGen/arm64-microsoft-intrinsics.c

Modified: cfe/trunk/test/CodeGen/arm64-microsoft-intrinsics.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm64-microsoft-intrinsics.c?rev=343843&r1=343842&r2=343843&view=diff
==
--- cfe/trunk/test/CodeGen/arm64-microsoft-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/arm64-microsoft-intrinsics.c Fri Oct  5 03:21:25 2018
@@ -74,7 +74,7 @@ unsigned __int64 check__getReg() {
   return reg;
 }
 
-// CHECK-MSVC: call i64 @llvm.read_register.i64(metadata !2)
-// CHECK-MSVC: call i64 @llvm.read_register.i64(metadata !3)
-// CHECK-MSVC: !2 = !{!"x18"}
-// CHECK-MSVC: !3 = !{!"sp"}
+// CHECK-MSVC: call i64 @llvm.read_register.i64(metadata ![[MD2:.*]])
+// CHECK-MSVC: call i64 @llvm.read_register.i64(metadata ![[MD3:.*]])
+// CHECK-MSVC: ![[MD2]] = !{!"x18"}
+// CHECK-MSVC: ![[MD3]] = !{!"sp"}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: r352055 - Fix failing buildbots

2019-02-01 Thread David Green via cfe-commits
Hello

Sorry for the late reply. I'm not sure this ifdef is quite correct. It will be 
testing the _host_ architecture, presuming the default target is the same. If 
they are different (for example if the default target is aarch64 on an x86 
machine), the test will presumably still fail.

I went looking through the buildbots and I think this hexagon bot builds that 
way:
http://lab.llvm.org:8011/builders/clang-hexagon-elf/builds/22699

Got any good suggestions how to fix it?

Thanks,
Dave





Author: martong
Date: Thu Jan 24 07:42:20 2019
New Revision: 352055

URL: http://llvm.org/viewvc/llvm-project?rev=352055&view=rev
Log:
Fix failing buildbots

Related commit which caused the buildbots to fail:
rL352050

Modified:
    cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp

Modified: cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp
URL:  
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp?rev=352055&r1=352054&r2=352055&view=diff
==
--- cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp (original)
+++ cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp Thu Jan 24 07:42:20 
2019
@@ -378,14 +378,17 @@ TEST_F(StructuralEquivalenceFunctionTest
   EXPECT_TRUE(testStructuralMatch(t));
 }
 
+// These calling conventions may not be available on certain platforms.
+#if defined(__x86_64__) && defined(__linux__)
 TEST_F(StructuralEquivalenceFunctionTest,
 FunctionsWithDifferentCallingConventions) {
   auto t = makeNamedDecls(
-  "__attribute__((fastcall)) void foo();",
+  "__attribute__((preserve_all)) void foo();",
   "__attribute__((ms_abi))   void foo();",
   Lang_C);
   EXPECT_FALSE(testStructuralMatch(t));
 }
+#endif
 
 TEST_F(StructuralEquivalenceFunctionTest, FunctionsWithDifferentSavedRegsAttr) 
{
   auto t = makeNamedDecls(


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: r352055 - Fix failing buildbots

2019-02-01 Thread David Green via cfe-commits
Hello


I think, because this is a unit-test, the compile will happen for the host 
(x86_64 in this case). So the binary will still be x86_64.


The compile that the test runs will pick up whatever the default target triple 
is (hexagon for the bot, aarch64 for us). I don't know a lot about these tests, 
but I presume that somewhere deep within testStructuralMatch or makeNamedDecls 
it will be picking this up and we can override it?


..


Looking at it now, I think the Args to buildASTFromCodeWithArgs will allow 
specific targets to be used. I'm not sure the best way to get that information 
through to there, but something like this would work:


diff --git a/unittests/AST/StructuralEquivalenceTest.cpp 
b/unittests/AST/StructuralEquivalenceTest.c
index e6c289a..52dba5e 100644
--- a/unittests/AST/StructuralEquivalenceTest.cpp
+++ b/unittests/AST/StructuralEquivalenceTest.cpp
@@ -28,6 +28,7 @@ struct StructuralEquivalenceTest : ::testing::Test {
 this->Code0 = SrcCode0;
 this->Code1 = SrcCode1;
 ArgVector Args = getBasicRunOptionsForLanguage(Lang);
+Args.push_back("--target=x86_64-unknown-linux-gnu");

 const char *const InputFileName = "input.cc";


I wouldn't recommend that, exactly, it would needlessly reduce the testing on 
other targets. And I think for the hexagon target the x86 backend will not even 
be registered I believe. Perhaps just something like this from another 
ASTMatchesNode test, to try and capture the same intent as the ifdefs:


TEST_F(StructuralEquivalenceFunctionTest, FunctionsWithDifferentSavedRegsAttr) {

  if (llvm::Triple(llvm::sys::getDefaultTargetTriple()).getArch() != 
llvm::Triple::x86_64)
return;
  ...


Dave



> Hi,
>
> Thank you for catching this. I thought that the macros like __x86_64__ are 
> defined for the target. I just don't understand: If they are defined for the 
> host, > that would mean we can't cross compile on the same host for different 
> targets, wouldn't it?
>
> I couldn't find out which macros to use to get the target arch, so I see 2 
> possible solutions :
> 1. Create a new test binary for these two small tests and specify explicitly 
> the target. This seems overwhelming.
> 2. Simply remove those two test cases. This seems to be the simplest solution.
>
> Gábor

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r352956 - [ASTImporter] Fix up test that only works on X86.

2019-02-02 Thread David Green via cfe-commits
Author: dmgreen
Date: Sat Feb  2 00:31:22 2019
New Revision: 352956

URL: http://llvm.org/viewvc/llvm-project?rev=352956&view=rev
Log:
[ASTImporter] Fix up test that only works on X86.

The test will fail if the default target triple is not X86,
even if the host platform is. So move the check into the
test at runtime.


Modified:
cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp

Modified: cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp?rev=352956&r1=352955&r2=352956&view=diff
==
--- cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp (original)
+++ cfe/trunk/unittests/AST/StructuralEquivalenceTest.cpp Sat Feb  2 00:31:22 
2019
@@ -378,10 +378,12 @@ TEST_F(StructuralEquivalenceFunctionTest
   EXPECT_TRUE(testStructuralMatch(t));
 }
 
-// These attributes may not be available on certain platforms.
-#if defined(__x86_64__) && defined(__linux__)
 TEST_F(StructuralEquivalenceFunctionTest,
 FunctionsWithDifferentCallingConventions) {
+  // These attributes may not be available on certain platforms.
+  if (llvm::Triple(llvm::sys::getDefaultTargetTriple()).getArch() !=
+  llvm::Triple::x86_64)
+return;
   auto t = makeNamedDecls(
   "__attribute__((preserve_all)) void foo();",
   "__attribute__((ms_abi))   void foo();",
@@ -390,13 +392,15 @@ TEST_F(StructuralEquivalenceFunctionTest
 }
 
 TEST_F(StructuralEquivalenceFunctionTest, FunctionsWithDifferentSavedRegsAttr) 
{
+  if (llvm::Triple(llvm::sys::getDefaultTargetTriple()).getArch() !=
+  llvm::Triple::x86_64)
+return;
   auto t = makeNamedDecls(
   "__attribute__((no_caller_saved_registers)) void foo();",
   "   void foo();",
   Lang_C);
   EXPECT_FALSE(testStructuralMatch(t));
 }
-#endif
 
 struct StructuralEquivalenceCXXMethodTest : StructuralEquivalenceTest {
 };


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: r352055 - Fix failing buildbots

2019-02-02 Thread David Green via cfe-commits
Sounds good to me, easy enough for me to test here. And I'll count that as a 
review.


I've given it a try in rC352956. We can see how that bot feels about it.


Dave

> Dave,
>
> The idea to check explicitly the triple inside the test function is quite 
> convincing. Will you try to fix it that way? Or if it can wait a bit, this 
> will be my first thing to do on Monday.
>
> Gábor
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r354201 - Move multiline raw string literal out of macro. NFC

2019-02-16 Thread David Green via cfe-commits
Author: dmgreen
Date: Sat Feb 16 03:19:04 2019
New Revision: 354201

URL: http://llvm.org/viewvc/llvm-project?rev=354201&view=rev
Log:
Move multiline raw string literal out of macro. NFC

Certain combinations of gcc and ccache fail when the raw
string literal is preprocessed. This just moves the string
out as is done elsewhere in the same file.

Modified:
cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp

Modified: cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp?rev=354201&r1=354200&r2=354201&view=diff
==
--- cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp (original)
+++ cfe/trunk/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp Sat Feb 16 
03:19:04 2019
@@ -491,16 +491,15 @@ TEST(MatcherCXXMemberCallExpr, On) {
   EXPECT_TRUE(matches(Snippet2, MatchesX));
 
   // Parens are ignored.
+  auto Snippet3 = R"cc(
+struct Y {
+  void m();
+};
+Y g();
+void z(Y y) { (g()).m(); }
+  )cc";
   auto MatchesCall = cxxMemberCallExpr(on(callExpr()));
-  EXPECT_TRUE(matches(
-  R"cc(
-struct Y {
-  void m();
-};
-Y g();
-void z(Y y) { (g()).m(); }
-  )cc",
-  MatchesCall));
+  EXPECT_TRUE(matches(Snippet3, MatchesCall));
 }
 
 TEST(MatcherCXXMemberCallExpr, OnImplicitObjectArgument) {
@@ -527,16 +526,15 @@ TEST(MatcherCXXMemberCallExpr, OnImplici
   EXPECT_TRUE(notMatches(Snippet2, MatchesX));
 
   // Parens are not ignored.
+  auto Snippet3 = R"cc(
+struct Y {
+  void m();
+};
+Y g();
+void z(Y y) { (g()).m(); }
+  )cc";
   auto MatchesCall = cxxMemberCallExpr(onImplicitObjectArgument(callExpr()));
-  EXPECT_TRUE(notMatches(
-  R"cc(
-struct Y {
-  void m();
-};
-Y g();
-void z(Y y) { (g()).m(); }
-  )cc",
-  MatchesCall));
+  EXPECT_TRUE(notMatches(Snippet3, MatchesCall));
 }
 
 TEST(Matcher, HasObjectExpr) {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r362814 - [ARM] Add ACLE feature macros for MVE.

2019-06-07 Thread David Green via cfe-commits
Author: dmgreen
Date: Fri Jun  7 10:28:12 2019
New Revision: 362814

URL: http://llvm.org/viewvc/llvm-project?rev=362814&view=rev
Log:
[ARM] Add ACLE feature macros for MVE.

Fixup uninitialised variable.

Modified:
cfe/trunk/lib/Basic/Targets/ARM.cpp

Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=362814&r1=362813&r2=362814&view=diff
==
--- cfe/trunk/lib/Basic/Targets/ARM.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/ARM.cpp Fri Jun  7 10:28:12 2019
@@ -400,6 +400,7 @@ bool ARMTargetInfo::initFeatureMap(
 bool ARMTargetInfo::handleTargetFeatures(std::vector &Features,
  DiagnosticsEngine &Diags) {
   FPU = 0;
+  MVE = 0;
   CRC = 0;
   Crypto = 0;
   DSP = 0;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r367885 - [AST] Fix RecursiveASTVisitorTest multiline string literal. NFC

2019-08-05 Thread David Green via cfe-commits
Author: dmgreen
Date: Mon Aug  5 09:27:36 2019
New Revision: 367885

URL: http://llvm.org/viewvc/llvm-project?rev=367885&view=rev
Log:
[AST] Fix RecursiveASTVisitorTest multiline string literal. NFC

Some compiler, notably older gccs (< 8) can have trouble with multiline raw
string literals inside macros. This just moves the code outsize the macro, to
attempt to appease the bots.

Modified:

cfe/trunk/unittests/Tooling/RecursiveASTVisitorTests/ImplicitCtorInitializer.cpp

Modified: 
cfe/trunk/unittests/Tooling/RecursiveASTVisitorTests/ImplicitCtorInitializer.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Tooling/RecursiveASTVisitorTests/ImplicitCtorInitializer.cpp?rev=367885&r1=367884&r2=367885&view=diff
==
--- 
cfe/trunk/unittests/Tooling/RecursiveASTVisitorTests/ImplicitCtorInitializer.cpp
 (original)
+++ 
cfe/trunk/unittests/Tooling/RecursiveASTVisitorTests/ImplicitCtorInitializer.cpp
 Mon Aug  5 09:27:36 2019
@@ -41,7 +41,7 @@ TEST(RecursiveASTVisitor, CXXCtorInitial
   for (bool VisitImplCode : {true, false}) {
 CXXCtorInitializerVisitor Visitor(VisitImplCode);
 Visitor.ExpectMatch("initializer", 7, 17);
-EXPECT_TRUE(Visitor.runOver(R"cpp(
+auto Code = R"cpp(
 class A {};
 class B : public A {
   B() {};
@@ -49,8 +49,8 @@ TEST(RecursiveASTVisitor, CXXCtorInitial
 class C : public A {
   C() : A() {}
 };
-  )cpp",
-CXXCtorInitializerVisitor::Lang_CXX));
+  )cpp";
+EXPECT_TRUE(Visitor.runOver(Code, CXXCtorInitializerVisitor::Lang_CXX));
 EXPECT_EQ(Visitor.VisitedImplicitInitializer, VisitImplCode);
   }
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r349059 - Fix CodeCompleteTest.cpp for older gcc plus ccache builds

2018-12-13 Thread David Green via cfe-commits
Author: dmgreen
Date: Thu Dec 13 09:20:06 2018
New Revision: 349059

URL: http://llvm.org/viewvc/llvm-project?rev=349059&view=rev
Log:
Fix CodeCompleteTest.cpp for older gcc plus ccache builds

Some versions of gcc, especially when invoked through ccache (-E), can have
trouble with raw string literals inside macros. This moves the string out of
the macro.

Modified:
cfe/trunk/unittests/Sema/CodeCompleteTest.cpp

Modified: cfe/trunk/unittests/Sema/CodeCompleteTest.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Sema/CodeCompleteTest.cpp?rev=349059&r1=349058&r2=349059&view=diff
==
--- cfe/trunk/unittests/Sema/CodeCompleteTest.cpp (original)
+++ cfe/trunk/unittests/Sema/CodeCompleteTest.cpp Thu Dec 13 09:20:06 2018
@@ -183,113 +183,113 @@ TEST(SemaCodeCompleteTest, VisitedNSWith
 
 TEST(PreferredTypeTest, BinaryExpr) {
   // Check various operations for arithmetic types.
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code1 = R"cpp(
 void test(int x) {
   x = ^10;
   x += ^10; x -= ^10; x *= ^10; x /= ^10; x %= ^10;
   x + ^10; x - ^10; x * ^10; x / ^10; x % ^10;
-})cpp"),
-  Each("int"));
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+})cpp";
+  EXPECT_THAT(collectPreferredTypes(code1), Each("int"));
+  StringRef code2 = R"cpp(
 void test(float x) {
   x = ^10;
   x += ^10; x -= ^10; x *= ^10; x /= ^10; x %= ^10;
   x + ^10; x - ^10; x * ^10; x / ^10; x % ^10;
-})cpp"),
-  Each("float"));
+})cpp";
+  EXPECT_THAT(collectPreferredTypes(code2), Each("float"));
 
   // Pointer types.
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code3 = R"cpp(
 void test(int *ptr) {
   ptr - ^ptr;
   ptr = ^ptr;
-})cpp"),
-  Each("int *"));
+})cpp";
+  EXPECT_THAT(collectPreferredTypes(code3), Each("int *"));
 
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code4 = R"cpp(
 void test(int *ptr) {
   ptr + ^10;
   ptr += ^10;
   ptr -= ^10;
-})cpp"),
-  Each("long")); // long is normalized 'ptrdiff_t'.
+})cpp";
+  EXPECT_THAT(collectPreferredTypes(code4), Each("long")); // long is 
normalized 'ptrdiff_t'.
 
   // Comparison operators.
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code5 = R"cpp(
 void test(int i) {
   i <= ^1; i < ^1; i >= ^1; i > ^1; i == ^1; i != ^1;
 }
-  )cpp"),
-  Each("int"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code5), Each("int"));
 
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code6 = R"cpp(
 void test(int *ptr) {
   ptr <= ^ptr; ptr < ^ptr; ptr >= ^ptr; ptr > ^ptr;
   ptr == ^ptr; ptr != ^ptr;
 }
-  )cpp"),
-  Each("int *"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code6), Each("int *"));
 
   // Relational operations.
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code7 = R"cpp(
 void test(int i, int *ptr) {
   i && ^1; i || ^1;
   ptr && ^1; ptr || ^1;
 }
-  )cpp"),
-  Each("_Bool"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code7), Each("_Bool"));
 
   // Bitwise operations.
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code8 = R"cpp(
 void test(long long ll) {
   ll | ^1; ll & ^1;
 }
-  )cpp"),
-  Each("long long"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code8), Each("long long"));
 
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code9 = R"cpp(
 enum A {};
 void test(A a) {
   a | ^1; a & ^1;
 }
-  )cpp"),
-  Each("enum A"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code9), Each("enum A"));
 
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code10 = R"cpp(
 enum class A {};
 void test(A a) {
   // This is technically illegal with the 'enum class' without overloaded
   // operators, but we pretend it's fine.
   a | ^a; a & ^a;
 }
-  )cpp"),
-  Each("enum A"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code10), Each("enum A"));
 
   // Binary shifts.
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code11 = R"cpp(
 void test(int i, long long ll) {
   i << ^1; ll << ^1;
   i <<= ^1; i <<= ^1;
   i >> ^1; ll >> ^1;
   i >>= ^1; i >>= ^1;
 }
-  )cpp"),
-  Each("int"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code11), Each("int"));
 
   // Comma does not provide any useful information.
-  EXPECT_THAT(collectPreferredTypes(R"cpp(
+  StringRef code12 = R"cpp(
 class Cls {};
 void test(int i, int* ptr, Cls x) {
   (i, ^i);
   (ptr, ^ptr);
   (x, ^x);
 }
-  )cpp"),
-  Each("NULL TYPE"));
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(code12), Each("NULL TYPE"));
 
   // User-defined types do not take operator overloading into account.
   // However, they provide heuristics for some common cases.
-

Re: r349053 - [CodeComplete] Fill preferred type on binary expressions

2018-12-13 Thread David Green via cfe-commits
Hello!

Certain version of gcc (along with ccache iirc, where they use -E) don't like 
these raw strings literals inside macros.

This happens: https://godbolt.org/g/fsXjB7


I've tried to fix it up in  
https://reviews.llvm.org/rL349059. My choice of variable names may not have 
been very inspired though. Let me know if anything looks off.


Cheers

Dave

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 7b3de1e - [ARM] Attempt to fixup MveEmitter warnings

2019-10-24 Thread David Green via cfe-commits

Author: David Green
Date: 2019-10-24T19:43:15+01:00
New Revision: 7b3de1e811972b874d91554642ccb2ef5b32eed6

URL: 
https://github.com/llvm/llvm-project/commit/7b3de1e811972b874d91554642ccb2ef5b32eed6
DIFF: 
https://github.com/llvm/llvm-project/commit/7b3de1e811972b874d91554642ccb2ef5b32eed6.diff

LOG: [ARM] Attempt to fixup MveEmitter warnings

Change-Id: I3fb06de2202c3b7a9ce511a40e758d0971ef9fdb

Added: 


Modified: 
clang/utils/TableGen/MveEmitter.cpp

Removed: 




diff  --git a/clang/utils/TableGen/MveEmitter.cpp 
b/clang/utils/TableGen/MveEmitter.cpp
index 6fac472a7a19..c4270ff586cf 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -157,8 +157,6 @@ inline std::string toLetter(ScalarTypeKind kind) {
 return "u";
   case ScalarTypeKind::Float:
 return "f";
-  default:
-llvm_unreachable("bad scalar type kind");
   }
 }
 inline std::string toCPrefix(ScalarTypeKind kind) {
@@ -169,8 +167,6 @@ inline std::string toCPrefix(ScalarTypeKind kind) {
 return "uint";
   case ScalarTypeKind::Float:
 return "float";
-  default:
-llvm_unreachable("bad scalar type kind");
   }
 }
 
@@ -538,7 +534,7 @@ class BuiltinArgResult : public Result {
 OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")
<< "(E->getArg(" << ArgNum << "))";
   }
-  virtual std::string typeName() const {
+  std::string typeName() const override {
 return AddressType ? "Address" : Result::typeName();
   }
 };



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 78700ef - [ARM] Fixup MVE intrinsic tests with no assert builds

2019-10-24 Thread David Green via cfe-commits

Author: David Green
Date: 2019-10-24T19:59:15+01:00
New Revision: 78700ef8866db7f5cea113fa81d810a28b5b7438

URL: 
https://github.com/llvm/llvm-project/commit/78700ef8866db7f5cea113fa81d810a28b5b7438
DIFF: 
https://github.com/llvm/llvm-project/commit/78700ef8866db7f5cea113fa81d810a28b5b7438.diff

LOG: [ARM] Fixup MVE intrinsic tests with no assert builds

The labels will be missing, so -fno-discard-value-names is added to the tests.

Added: 


Modified: 
clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
clang/test/CodeGen/arm-mve-intrinsics/vadc.c
clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
clang/test/CodeGen/arm-mve-intrinsics/vld24.c
clang/test/CodeGen/arm-mve-intrinsics/vldr.c
clang/test/CodeGen/arm-mve-intrinsics/vminvq.c

Removed: 




diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c 
b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
index d7c4d5e85ae4..ec9a47f18eb9 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include 
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
index bd6bdc53d08d..6b77eac9ca54 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o 
- %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone 
-fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include 
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
index 30923ee1a2b2..970ac53cefc6 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o 
- %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone 
-fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include 
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index ab1b0180eeef..1aae36619dfa 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
+// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include 
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
index 2adf6db98832..df128b61bcea 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
@@ -1,6 +1,6 @@
 /

[clang] [AArch64] Add soft-float ABI (PR #74460)

2023-12-05 Thread David Green via cfe-commits


@@ -534,7 +540,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, 
QualType Ty,
 BaseTy = ArrTy->getElementType();
 NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+  bool IsFPR = Kind == AArch64ABIKind::AAPCS &&

davemgreen wrote:

Could this be Kind != AArch64ABIKind::AAPCS` in case other ABIKinds make their 
way here.

https://github.com/llvm/llvm-project/pull/74460
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libcxx] [clang] [flang] [compiler-rt] [llvm] [clang-tools-extra] [libcxxabi] [lldb] [mlir] [openmp] [MachineCopyPropagation] When the source of PreviousCopy is undef, we cannot replace sub register (

2023-12-07 Thread David Green via cfe-commits

davemgreen wrote:

Hello. I think that if you removed undef from the first instruction the result 
would still be incorrect. With:
```
$x8 = ORRXrs $xzr, $x0, 0, implicit $w0
$w8 = ORRWrs $wzr, $w0, 0, implicit-def $x8
```
The second instruction will zero-extend the w0 register to x8. It would be OK 
to remove the first instruction (it is dead), it is not OK to remove the second 
if something is relying on the top bits being zero. I assume that's what goes 
wrong in your case? The top bits are not zero into the function?

https://github.com/llvm/llvm-project/pull/74682
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [ARM][AArch32] Add support for AArch32 Cortex-M52 CPU (PR #74822)

2023-12-08 Thread David Green via cfe-commits


@@ -899,6 +899,7 @@ Arm and AArch64 Support
   * Arm Cortex-A520 (cortex-a520).
   * Arm Cortex-A720 (cortex-a720).
   * Arm Cortex-X4 (cortex-x4).
+  * Arm Cortex-M52 (cortex-m52).

davemgreen wrote:

Is it worth splitting this list into one for -target=aarch64 and another for 
-target=arm?

https://github.com/llvm/llvm-project/pull/74822
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [ARM][AArch32] Add support for AArch32 Cortex-M52 CPU (PR #74822)

2023-12-08 Thread David Green via cfe-commits

davemgreen wrote:

CDE is enabled per decode block and probably doesn't make a lot of sense to 
enable universally. The cde options each pick between two features (CDE vs 
co-processor), and so isn't quite the same as on vs off.

https://github.com/llvm/llvm-project/pull/74822
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [ARM][AArch32] Add support for AArch32 Cortex-M52 CPU (PR #74822)

2023-12-08 Thread David Green via cfe-commits


@@ -896,9 +896,13 @@ Arm and AArch64 Support
 
   Support has been added for the following processors (-mcpu identifiers in 
parenthesis):
 
-  * Arm Cortex-A520 (cortex-a520).
-  * Arm Cortex-A720 (cortex-a720).
-  * Arm Cortex-X4 (cortex-x4).
+  --target=arm
+ * Arm Cortex-M52 (cortex-m52).
+
+  --target=aarch64
+ * Arm Cortex-A520 (cortex-a520).
+ * Arm Cortex-A720 (cortex-a720).
+ * Arm Cortex-X4 (cortex-x4).

davemgreen wrote:

Maybe just "For Arm:" and "For AArch64:" if the --target is awkward. Otherwise 
this LGTM if David Agrees

https://github.com/llvm/llvm-project/pull/74822
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [lldb] [openmp] [llvm] [clang] [libcxxabi] [compiler-rt] [clang-tools-extra] [flang] [libcxx] [MachineCopyPropagation] When the source of PreviousCopy is undef, we cannot replace sub register (

2023-12-11 Thread David Green via cfe-commits

davemgreen wrote:

I don't believe the undef is the issue - I think the issue is that 
AArch64InstrInfo::isCopyInstrImpl is saying that a W-reg orr is a copy, even if 
it is really a zextend because the entire X output register is depended upon.

Can you try and add something to isCopyInstImpl instead, that says: If the 
register is virtual then it should not be a subreg, and if the register is 
physical then there should not be an implicit def of the X reg. Would that 
solve your issue, and would it cause other problems in AArch64 codegen? Thanks

https://github.com/llvm/llvm-project/pull/74682
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [llvm][ARM] Emit MVE .arch_extension after .fpu directive if it does not include MVE features (PR #71545)

2023-11-16 Thread David Green via cfe-commits

https://github.com/davemgreen commented:

Hi. From what I can tell the logic looks OK. We add the archextension in places 
we expect to now?

It is generally considered best practice to not have clang test that run the 
entire pass pipeline. In this case it looks like it's trying to SLP vectorize 
the code to make sure MVE operations are produced?  The problem can be that 
decision like that can change, and it is better if the tests are more narrowly 
focussed than replying on the whole pipeline. You do loose end-to-end testing 
but it may be better to have a clang test that checks the IR generated is what 
is expected, and an assembly test to make sure the `.arch_extension mve.fp` is 
recognized and turns on the MVE instructions it should do.

https://github.com/llvm/llvm-project/pull/71545
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [llvm][ARM] Emit MVE .arch_extension after .fpu directive if it does not include MVE features (PR #71545)

2023-11-16 Thread David Green via cfe-commits

https://github.com/davemgreen edited 
https://github.com/llvm/llvm-project/pull/71545
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [llvm][ARM] Emit MVE .arch_extension after .fpu directive if it does not include MVE features (PR #71545)

2023-11-16 Thread David Green via cfe-commits


@@ -0,0 +1,35 @@
+// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m85 -mfloat-abi=hard -O2 
-save-temps=obj -S -o - %s | FileCheck %s
+// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard -O2 
-save-temps=obj -S -o - %s | FileCheck %s
+
+// The below tests are to make sure that assembly directives do not lose mve 
feature so that reassembly works with
+// mve floating point instructions.
+// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m85 -mfloat-abi=hard -O2 -c 
-mthumb -save-temps=obj %s
+// RUN: %clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard -O2 -c 
-mthumb -save-temps=obj %s
+
+// REQUIRES: arm-registered-target
+
+// CHECK: .fpu   fpv5-d16
+// CHECK-NEXT  .arch_extension mve.fp
+
+#define DUMMY_CONST_1 (0.0012345F)
+
+typedef struct
+{
+float a;
+float b;
+float c;
+float d;
+} dummy_t;
+
+// CHECK-LABEL: foo
+// CHECK: vsub.f32
+// CHECK: vfma.f32

davemgreen wrote:

This looks like it is trying to SLP vectorize to vector operations? These 
instructions will match both `vsub.f32 s6, s2, s10` and `vsub.f32 q6, q2, q1` 
operations though.

https://github.com/llvm/llvm-project/pull/71545
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [llvm][ARM] Emit MVE .arch_extension after .fpu directive if it does not include MVE features (PR #71545)

2023-11-21 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

Thanks. LGTM

https://github.com/llvm/llvm-project/pull/71545
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [MachineLICM][AArch64] Hoist COPY instructions with other uses in the loop (PR #71403)

2023-11-21 Thread David Green via cfe-commits


@@ -1262,6 +1262,18 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr 
&MI,
 return false;
   }
 
+  // If we have a COPY with other uses in the loop, hoist to allow the users to

davemgreen wrote:

Hello. From what I've seen in our benchmarks this has been positive, but there 
is often some noise from hoisting/sinking. You are right that this could be 
more conservative, but in our case cross register bank copies will be 
relatively expensive and we would want to hoist them if we could. I'm not sure 
about the subreg extracts, but a lot of COPYs are removed prior to register 
allocation and it would be good if it knew where best to re-add them, if needed.

https://github.com/llvm/llvm-project/pull/71403
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang-tools-extra] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-13 Thread David Green via cfe-commits


@@ -0,0 +1,839 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Value *Start,
+Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+Value *MaxLen, Value *Index, Value *Start,
+bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout())
+  .run(L);
+}
+
+} // end anonymou

[clang] [clang-tools-extra] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-13 Thread David Green via cfe-commits


@@ -0,0 +1,839 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.

davemgreen wrote:

Can we add a check for this now? It looks like it will be quite a bit bigger?

https://github.com/llvm/llvm-project/pull/72273
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [clang] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-13 Thread David Green via cfe-commits


@@ -0,0 +1,839 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Value *Start,
+Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+Value *MaxLen, Value *Index, Value *Start,
+bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout())
+  .run(L);
+}
+
+} // end anonymou

[clang] [llvm] [clang-tools-extra] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-13 Thread David Green via cfe-commits


@@ -0,0 +1,839 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Value *Start,
+Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+Value *MaxLen, Value *Index, Value *Start,
+bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout())
+  .run(L);
+}
+
+} // end anonymou

[clang-tools-extra] [clang] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-13 Thread David Green via cfe-commits


@@ -0,0 +1,839 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Value *Start,
+Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+Value *MaxLen, Value *Index, Value *Start,
+bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout())
+  .run(L);
+}
+
+} // end anonymou

[llvm] [clang] [clang-tools-extra] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-13 Thread David Green via cfe-commits


@@ -0,0 +1,839 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Value *Start,
+Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+Value *MaxLen, Value *Index, Value *Start,
+bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout())
+  .run(L);
+}
+
+} // end anonymou

[openmp] [clang-tools-extra] [libcxx] [mlir] [clang] [compiler-rt] [lldb] [llvm] [libcxxabi] [flang] [MachineCopyPropagation] When the source of PreviousCopy is undef, we cannot replace sub register (

2023-12-13 Thread David Green via cfe-commits

davemgreen wrote:

Thanks. It sounds like there are not a lot of code changes, which is a good 
sign. I didn't expect the debug problems though.

I'll try and take a look at the patch. Perhaps you are right that we need a new 
method for the debug info to use.

https://github.com/llvm/llvm-project/pull/74682
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang-tools-extra] [LoopVectorize] Enable hoisting of runtime checks by default (PR #71538)

2023-12-15 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

With that fixed, and from the perf Ive seen, this LGTM. Thanks

https://github.com/llvm/llvm-project/pull/71538
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [TargetParser] Define AEK_FCMA and AEK_JSCVT for tsv110 (PR #75516)

2023-12-18 Thread David Green via cfe-commits


@@ -81,6 +81,15 @@ static bool DecodeAArch64Features(const Driver &D, StringRef 
text,
 else
   return false;
 
+// +jsconv and +complxnum implies +neon and +fp-armv8

davemgreen wrote:

I believe this ideally would not be in the driver, as it does not apply to 
target attributes, only -march options.

https://github.com/llvm/llvm-project/pull/75516
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2023-12-18 Thread David Green via cfe-commits

davemgreen wrote:

It looks like there is a downstream implementation of this that was never 
upstreamed. Perhaps someone can fish it out for you to show how it looked? It 
might be using the wrong predefined macro, but does have some tests.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang-tools-extra] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits

https://github.com/davemgreen edited 
https://github.com/llvm/llvm-project/pull/72273
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [clang] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[llvm] [clang-tools-extra] [clang] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[clang] [clang-tools-extra] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits

https://github.com/davemgreen commented:

Thanks. I think it is worth trying to get this in. I already see it triggering 
in a number of places, it might be worth working on making it a little more 
generic in followup patches if we can, but there is already quite a bit going 
on.

https://github.com/llvm/llvm-project/pull/72273
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang-tools-extra] [clang] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[clang] [clang-tools-extra] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[clang-tools-extra] [llvm] [clang] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[clang] [llvm] [clang-tools-extra] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[clang] [clang-tools-extra] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[llvm] [clang-tools-extra] [clang] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[clang] [clang-tools-extra] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[llvm] [clang-tools-extra] [clang] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[llvm] [clang-tools-extra] [clang] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2023-12-19 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[compiler-rt] [clang] [libc] [llvm] [lldb] [clang-tools-extra] [flang] [mlir] [DAGCombiner] Combine frem into fdiv+ftrunc+fma (PR #67642)

2023-11-30 Thread David Green via cfe-commits

davemgreen wrote:

Do you have any analysis on the expected magnitude of the inaccuracy we might 
expect from performing the fdiv/trunc/fma vs the call to fmod?

https://github.com/llvm/llvm-project/pull/67642
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2023-12-21 Thread David Green via cfe-commits

davemgreen wrote:

Let me try and get the downstream version, you might be able to pick up some 
things from it. A test at least should probably be present.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2023-12-21 Thread David Green via cfe-commits

davemgreen wrote:

This is the downstream code we have: 
https://gist.github.com/davemgreen/e7ade833274a60e975e67a66eda7cb44
Note that the __ARM_TARGET_COPROC_XYZ macros are probably wrong. They should be 
__ARM_FEATURE_COPROC bitfield macros according to the ACLE.

Can you make use of some of that? It would be good to add the macro definition 
at the same time as the intrinsics (they can be used to control when the 
intrinsics are available), and the test should be useful for checking they are 
available at the right times.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2023-12-22 Thread David Green via cfe-commits

davemgreen wrote:

Thanks for doing this.
I think that __ARM_FEATURE_COPROC should be a bitfield, as defined in 
https://arm-software.github.io/acle/main/acle.html#coprocessor-intrinsics. That 
would remove the need for the other macros.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-02 Thread David Green via cfe-commits

https://github.com/davemgreen commented:

Thanks. This is looking good to me. I just have a few comments about different 
architecture revisions.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-02 Thread David Green via cfe-commits


@@ -836,6 +837,70 @@ void ARMTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (Opts.RWPI)
 Builder.defineMacro("__ARM_RWPI", "1");
 
+  // Macros for enabling co-proc intrinsics
+  uint64_t FeatureCoprocBF = 0;
+  switch (ArchKind) {
+  default:
+break;
+  case llvm::ARM::ArchKind::ARMV4:
+// Filter __arm_ldcl and __arm_stcl in acle.h
+FeatureCoprocBF = FEATURE_COPROC_B1;
+break;
+  case llvm::ARM::ArchKind::ARM5T:
+FeatureCoprocBF = isThumb() ? 0 : FEATURE_COPROC_B1;
+break;
+  case llvm::ARM::ArchKind::ARMV5TE:
+  case llvm::ARM::ArchKind::ARMV5TEJ:
+if (!isThumb())
+  FeatureCoprocBF =
+  FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3;
+break;
+  case llvm::ARM::ArchKind::ARMV6:
+  case llvm::ARM::ArchKind::ARMV6K:
+  case llvm::ARM::ArchKind::ARMV6KZ:
+  case llvm::ARM::ArchKind::ARMV6T2:
+if (!isThumb() || ArchKind == llvm::ARM::ArchKind::ARMV6T2)
+  FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV7A:
+  case llvm::ARM::ArchKind::ARMV7R:
+  case llvm::ARM::ArchKind::ARMV7M:
+  case llvm::ARM::ArchKind::ARMV7S:
+  case llvm::ARM::ArchKind::ARMV7EM:
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+  FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV8A:
+  case llvm::ARM::ArchKind::ARMV8R:
+  case llvm::ARM::ArchKind::ARMV8_1A:
+  case llvm::ARM::ArchKind::ARMV8_2A:
+  case llvm::ARM::ArchKind::ARMV8_3A:
+  case llvm::ARM::ArchKind::ARMV8_4A:
+  case llvm::ARM::ArchKind::ARMV8_5A:
+  case llvm::ARM::ArchKind::ARMV8_6A:
+  case llvm::ARM::ArchKind::ARMV8_7A:
+  case llvm::ARM::ArchKind::ARMV8_8A:
+  case llvm::ARM::ArchKind::ARMV8_9A:
+// Filter __arm_cdp, __arm_ldcl, __arm_stcl in arm_acle.h
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B3;
+break;
+  case llvm::ARM::ArchKind::ARMV8MMainline:

davemgreen wrote:

Add ARMV8_1MMainline too.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-02 Thread David Green via cfe-commits


@@ -836,6 +837,70 @@ void ARMTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (Opts.RWPI)
 Builder.defineMacro("__ARM_RWPI", "1");
 
+  // Macros for enabling co-proc intrinsics
+  uint64_t FeatureCoprocBF = 0;
+  switch (ArchKind) {
+  default:
+break;
+  case llvm::ARM::ArchKind::ARMV4:
+// Filter __arm_ldcl and __arm_stcl in acle.h
+FeatureCoprocBF = FEATURE_COPROC_B1;
+break;
+  case llvm::ARM::ArchKind::ARM5T:
+FeatureCoprocBF = isThumb() ? 0 : FEATURE_COPROC_B1;
+break;
+  case llvm::ARM::ArchKind::ARMV5TE:
+  case llvm::ARM::ArchKind::ARMV5TEJ:
+if (!isThumb())
+  FeatureCoprocBF =
+  FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3;
+break;
+  case llvm::ARM::ArchKind::ARMV6:
+  case llvm::ARM::ArchKind::ARMV6K:
+  case llvm::ARM::ArchKind::ARMV6KZ:
+  case llvm::ARM::ArchKind::ARMV6T2:
+if (!isThumb() || ArchKind == llvm::ARM::ArchKind::ARMV6T2)
+  FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV7A:
+  case llvm::ARM::ArchKind::ARMV7R:
+  case llvm::ARM::ArchKind::ARMV7M:
+  case llvm::ARM::ArchKind::ARMV7S:
+  case llvm::ARM::ArchKind::ARMV7EM:
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+  FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV8A:
+  case llvm::ARM::ArchKind::ARMV8R:
+  case llvm::ARM::ArchKind::ARMV8_1A:
+  case llvm::ARM::ArchKind::ARMV8_2A:
+  case llvm::ARM::ArchKind::ARMV8_3A:
+  case llvm::ARM::ArchKind::ARMV8_4A:
+  case llvm::ARM::ArchKind::ARMV8_5A:
+  case llvm::ARM::ArchKind::ARMV8_6A:
+  case llvm::ARM::ArchKind::ARMV8_7A:
+  case llvm::ARM::ArchKind::ARMV8_8A:
+  case llvm::ARM::ArchKind::ARMV8_9A:
+// Filter __arm_cdp, __arm_ldcl, __arm_stcl in arm_acle.h
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B3;
+break;
+  case llvm::ARM::ArchKind::ARMV8MMainline:
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+  FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV9A:
+  case llvm::ARM::ArchKind::ARMV9_1A:
+  case llvm::ARM::ArchKind::ARMV9_2A:
+  case llvm::ARM::ArchKind::ARMV9_3A:
+  case llvm::ARM::ArchKind::ARMV9_4A:

davemgreen wrote:

There is a ARMV9_5A now too. I think I would expect these to be the same as 
ARMV8.
Is this switch statement exhaustive? Could the default case be made the same as 
ARMV8 so we don't need to extend it every time an architecture is added?

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-02 Thread David Green via cfe-commits


@@ -756,6 +756,58 @@ __arm_st64bv0(void *__addr, data512_t __value) {
   __builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
 #endif
 
+/* Coprocessor Intrinsics */
+#if defined(__ARM_FEATURE_COPROC)
+
+#if (__ARM_FEATURE_COPROC & 0x1)
+
+#if (__ARM_ARCH != 8)

davemgreen wrote:

Could this be < 8?
This doesn't apply to 8-m.main, right? The test looks OK.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-02 Thread David Green via cfe-commits

https://github.com/davemgreen edited 
https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-07 Thread David Green via cfe-commits


@@ -836,6 +837,70 @@ void ARMTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (Opts.RWPI)
 Builder.defineMacro("__ARM_RWPI", "1");
 
+  // Macros for enabling co-proc intrinsics
+  uint64_t FeatureCoprocBF = 0;
+  switch (ArchKind) {
+  default:
+break;
+  case llvm::ARM::ArchKind::ARMV4:
+// Filter __arm_ldcl and __arm_stcl in acle.h
+FeatureCoprocBF = FEATURE_COPROC_B1;
+break;
+  case llvm::ARM::ArchKind::ARM5T:
+FeatureCoprocBF = isThumb() ? 0 : FEATURE_COPROC_B1;
+break;
+  case llvm::ARM::ArchKind::ARMV5TE:
+  case llvm::ARM::ArchKind::ARMV5TEJ:
+if (!isThumb())
+  FeatureCoprocBF =
+  FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3;
+break;
+  case llvm::ARM::ArchKind::ARMV6:
+  case llvm::ARM::ArchKind::ARMV6K:
+  case llvm::ARM::ArchKind::ARMV6KZ:
+  case llvm::ARM::ArchKind::ARMV6T2:
+if (!isThumb() || ArchKind == llvm::ARM::ArchKind::ARMV6T2)
+  FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV7A:
+  case llvm::ARM::ArchKind::ARMV7R:
+  case llvm::ARM::ArchKind::ARMV7M:
+  case llvm::ARM::ArchKind::ARMV7S:
+  case llvm::ARM::ArchKind::ARMV7EM:
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+  FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV8A:
+  case llvm::ARM::ArchKind::ARMV8R:
+  case llvm::ARM::ArchKind::ARMV8_1A:
+  case llvm::ARM::ArchKind::ARMV8_2A:
+  case llvm::ARM::ArchKind::ARMV8_3A:
+  case llvm::ARM::ArchKind::ARMV8_4A:
+  case llvm::ARM::ArchKind::ARMV8_5A:
+  case llvm::ARM::ArchKind::ARMV8_6A:
+  case llvm::ARM::ArchKind::ARMV8_7A:
+  case llvm::ARM::ArchKind::ARMV8_8A:
+  case llvm::ARM::ArchKind::ARMV8_9A:
+// Filter __arm_cdp, __arm_ldcl, __arm_stcl in arm_acle.h
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B3;
+break;
+  case llvm::ARM::ArchKind::ARMV8MMainline:
+FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 |
+  FEATURE_COPROC_B3 | FEATURE_COPROC_B4;
+break;
+  case llvm::ARM::ArchKind::ARMV9A:
+  case llvm::ARM::ArchKind::ARMV9_1A:
+  case llvm::ARM::ArchKind::ARMV9_2A:
+  case llvm::ARM::ArchKind::ARMV9_3A:
+  case llvm::ARM::ArchKind::ARMV9_4A:

davemgreen wrote:

Oh right, ARMV9_5A is AArch64 only. That's OK then.
I would expect the other ArmV9-A cases to be the same as ArmV8-A for AArch32, 
and wouldn't have expected a change in coprocessor instructions.
The reference manual is at 
https://developer.arm.com/documentation/ddi0487/ja/?lang=en and doesn't seem to 
mention cdp.

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-07 Thread David Green via cfe-commits

davemgreen wrote:

If you can make armv9-a work the same as armv8-a and add some tests for it then 
this LGTM

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [clang] [llvm] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2024-01-08 Thread David Green via cfe-commits

https://github.com/davemgreen edited 
https://github.com/llvm/llvm-project/pull/72273
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang-tools-extra] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2024-01-08 Thread David Green via cfe-commits


@@ -0,0 +1,816 @@
+//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This pass implements a pass that recognizes certain loop idioms and
+// transforms them into more optimized versions of the same loop. In cases
+// where this happens, it can be a significant performance win.
+//
+// We currently only recognize one loop that finds the first mismatched byte
+// in an array and returns the index, i.e. something like:
+//
+//  while (++i != n) {
+//if (a[i] != b[i])
+//  break;
+//  }
+//
+// In this example we can actually vectorize the loop despite the early exit,
+// although the loop vectorizer does not support it. It requires some extra
+// checks to deal with the possibility of faulting loads when crossing page
+// boundaries. However, even with these checks it is still profitable to do the
+// transformation.
+//
+//===--===//
+//
+// TODO List:
+//
+// * When optimizing for code size we may want to avoid some transformations.
+// * We can also support the inverse case where we scan for a matching element.
+//
+//===--===//
+
+#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+
+static cl::opt
+DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
+   cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
+
+static cl::opt DisableByteCmp(
+"disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
+cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
+ "not convert byte-compare loop(s)."));
+
+static cl::opt VerifyLoops(
+"aarch64-lit-verify", cl::Hidden, cl::init(false),
+cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+
+namespace llvm {
+
+void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
+Pass *createAArch64LoopIdiomTransformPass();
+
+} // end namespace llvm
+
+namespace {
+
+class AArch64LoopIdiomTransform {
+  Loop *CurLoop = nullptr;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
+
+public:
+  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL)
+  : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+
+  bool run(Loop *L);
+
+private:
+  /// \name Countable Loop Idiom Handling
+  /// @{
+
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+  SmallVectorImpl &ExitBlocks);
+
+  bool recognizeByteCompare();
+  Value *expandFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+GetElementPtrInst *GEPB, Instruction *Index,
+Value *Start, Value *MaxLen);
+  void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+PHINode *IndPhi, Value *MaxLen, Instruction *Index,
+Value *Start, bool IncIdx, BasicBlock *FoundBB,
+BasicBlock *EndBB);
+  /// @}
+};
+
+class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
+public:
+  static char ID;
+
+  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
+initializeAArch64LoopIdiomTransformLegacyPassPass(
+*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+return "Transform AArch64-specific loop idioms";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.addRequired();
+AU.addRequired();
+AU.addRequired();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
+LPPassManager &LPM) {
+
+  if (skipLoop(L))
+return false;
+
+  auto *DT = &getAnalysis().getDomTree();
+  auto *LI = &getAnalysis().getLoopInfo();
+  auto &TTI = getAnalysis().getTTI(
+  *L->getHeader()->getParent());
+  return AArch64LoopIdiomTransform(
+ DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout

[llvm] [clang] [clang-tools-extra] [AArch64] Add an AArch64 pass for loop idiom transformations (PR #72273)

2024-01-08 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

Thanks for the updates. From what I can tell this LGTM, but it will need a 
rebase.

You might want to commit it with the option disabled, and then flip the switch 
in a followup to avoid the commit-revert cycles in case there are any issues.

https://github.com/llvm/llvm-project/pull/72273
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [ARM] arm_acle.h add Coprocessor Instrinsics (PR #75440)

2024-01-08 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

Thanks. LGTM

https://github.com/llvm/llvm-project/pull/75440
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 3b09e53 - [ARM] Remove duplicate fp16 intrinsics

2022-07-28 Thread David Green via cfe-commits

Author: David Green
Date: 2022-07-28T14:26:17+01:00
New Revision: 3b09e532ee396bb07820ecadb29e1ed88f6e6c25

URL: 
https://github.com/llvm/llvm-project/commit/3b09e532ee396bb07820ecadb29e1ed88f6e6c25
DIFF: 
https://github.com/llvm/llvm-project/commit/3b09e532ee396bb07820ecadb29e1ed88f6e6c25.diff

LOG: [ARM] Remove duplicate fp16 intrinsics

These vdup and vmov float16 intrinsics are being defined in both the
general section and then again in fp16 under a !aarch64 flag. The
vdup_lane intrinsics were being defined in both aarch64 and !aarch64
sections, so have been commoned.  They are defined as macros, so do not
give duplicate warnings, but removing the duplicates shouldn't alter the
available intrinsics.

Added: 


Modified: 
clang/include/clang/Basic/arm_neon.td
clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index 2e9798129fdfb..93f9961931370 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -530,7 +530,7 @@ def VMOV_N   : WOpInst<"vmov_n", ".1",
 }
 let InstName = "" in
 def VDUP_LANE: WOpInst<"vdup_lane", ".qI",
-   "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
+   "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
OP_DUP_LN>;
 
 

@@ -980,7 +980,7 @@ def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
 
 

 // Set all lanes to same value
-def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>;
+def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
 def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
   "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
 OP_DUP_LN> {
@@ -1644,7 +1644,8 @@ def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", 
"ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", 
"ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
   let isLaneQ = 1;
 }
-}
+
+} // ArchGuard = "defined(__aarch64__)"
 
 // ARMv8.2-A FP16 vector intrinsics for A32/A64.
 let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
@@ -1763,15 +1764,6 @@ let ArchGuard = 
"defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
   def VUZPH: WInst<"vuzp", "2..", "hQh">;
   def VTRNH: WInst<"vtrn", "2..", "hQh">;
 
-
-  let ArchGuard = "!defined(__aarch64__)" in {
-// Set all lanes to same value.
-// Already implemented prior to ARMv8.2-A.
-def VMOV_NH  : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>;
-def VDUP_NH  : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>;
-def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>;
-  }
-
   // Vector Extract
   def VEXTH  : WInst<"vext", "...I", "hQh">;
 

diff  --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c 
b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index 08e7fecd1330f..3dc3a49a9bfd5 100644
--- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -1754,15 +1754,15 @@ float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t 
b) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmulh_lane_f16
 // CHECK-SAME: (half noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[__REINT_851:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT:[[__REINT1_851:%.*]] = alloca i16, align 2
+// CHECK-NEXT:[[__REINT_847:%.*]] = alloca <4 x half>, align 8
+// CHECK-NEXT:[[__REINT1_847:%.*]] = alloca i16, align 2
 // CHECK-NEXT:[[CONV:%.*]] = fpext half [[A]] to float
-// CHECK-NEXT:store <4 x half> [[B]], <4 x half>* [[__REINT_851]], align 8
-// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_851]] to <4 x 
i16>*
+// CHECK-NEXT:store <4 x half> [[B]], <4 x half>* [[__REINT_847]], align 8
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_847]] to <4 x 
i16>*
 // CHECK-NEXT:[[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
 // CHECK-NEXT:[[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
-// CHECK-NEXT:store i16 [[VGET_LANE]], i16* [[__REINT1_851]], align 2
-// CHECK-NEXT:[[TMP2:%.*]] = bitcast i16* [[__REINT1_851]] to half*
+// CHECK-NEXT:store i16 [[VGET_LANE]], i16* [[__REINT1_847]], align 2
+// CHECK-NEXT:[[TMP2:%.*]] = bitcast i16* [[__REINT1_847]] to half*
 // CHECK-NEXT:[[TMP3:%.*]] = load half, half* [[TMP2]], align 2
 // CHECK-NEXT:[[CONV2:%.*]] = fpext half [[TMP3]] to float
 // CHECK-NEXT:[[MUL:%.*]] = fmul float [[CONV]], [[CONV2]]
@@ -1776,15 +1776,15 @@ float16_t test_vmulh_lane_f16(float16_t a, float16x4_t 
b) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmulh_laneq_f16
 // CHEC

[clang] ef9df0d - [ARM] Simplify ArchGuard predicates in arm_neon.h.

2022-08-01 Thread David Green via cfe-commits

Author: David Green
Date: 2022-08-01T08:20:23+01:00
New Revision: ef9df0dc000c2c294398ab6fe13533d967dc29cd

URL: 
https://github.com/llvm/llvm-project/commit/ef9df0dc000c2c294398ab6fe13533d967dc29cd
DIFF: 
https://github.com/llvm/llvm-project/commit/ef9df0dc000c2c294398ab6fe13533d967dc29cd.diff

LOG: [ARM] Simplify ArchGuard predicates in arm_neon.h.

__ARM_ARCH >= 8 is implied by defined(__aarch64__), so we don't need to
guard against both together.

Added: 


Modified: 
clang/include/clang/Basic/arm_neon.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index 93f9961931370..69371113c1c8f 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -1141,7 +1141,7 @@ def SHA256H2 : SInst<"vsha256h2", "", "QUi">;
 def SHA256SU1 : SInst<"vsha256su1", "", "QUi">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA3) && 
defined(__aarch64__)" in {
+let ArchGuard = "defined(__ARM_FEATURE_SHA3) && defined(__aarch64__)" in {
 def BCAX : SInst<"vbcax", "", "QUcQUsQUiQUlQcQsQiQl">;
 def EOR3 : SInst<"veor3", "", "QUcQUsQUiQUlQcQsQiQl">;
 def RAX1 : SInst<"vrax1", "...", "QUl">;
@@ -1151,7 +1151,7 @@ def XAR :  SInst<"vxar", "...I", "QUl">;
 }
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA512) && 
defined(__aarch64__)" in {
+let ArchGuard = "defined(__ARM_FEATURE_SHA512) && defined(__aarch64__)" in {
 
 def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
 def SHA512su1 : SInst<"vsha512su1", "", "QUl">;
@@ -1159,7 +1159,7 @@ def SHA512H : SInst<"vsha512h", "", "QUl">;
 def SHA512H2 : SInst<"vsha512h2", "", "QUl">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SM3) && 
defined(__aarch64__)" in {
+let ArchGuard = "defined(__ARM_FEATURE_SM3) && defined(__aarch64__)" in {
 def SM3SS1 : SInst<"vsm3ss1", "", "QUi">;
 def SM3TT1A : SInst<"vsm3tt1a", "I", "QUi">;
 def SM3TT1B : SInst<"vsm3tt1b", "I", "QUi">;
@@ -1169,7 +1169,7 @@ def SM3PARTW1 : SInst<"vsm3partw1", "", "QUi">;
 def SM3PARTW2 : SInst<"vsm3partw2", "", "QUi">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SM4) && 
defined(__aarch64__)" in {
+let ArchGuard = "defined(__ARM_FEATURE_SM4) && defined(__aarch64__)" in {
 def SM4E : SInst<"vsm4e", "...", "QUi">;
 def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">;
 }
@@ -1192,7 +1192,7 @@ def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">;
 def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__)" in {
 def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">;
 def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">;
 def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">;
@@ -1216,7 +1216,7 @@ def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">;
 def FRINTI_S32 : SInst<"vrndi", "..", "fQf">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && 
defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in {
+let ArchGuard = "defined(__aarch64__) && 
defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in {
 def FRINTN_S64 : SInst<"vrndn", "..", "dQd">;
 def FRINTA_S64 : SInst<"vrnda", "..", "dQd">;
 def FRINTP_S64 : SInst<"vrndp", "..", "dQd">;
@@ -1226,7 +1226,7 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">;
 def FRINTI_S64 : SInst<"vrndi", "..", "dQd">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && 
defined(__ARM_FEATURE_FRINT)" in {
+let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_FRINT)" in {
 def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
 def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
 def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
@@ -1241,7 +1241,7 @@ def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">;
 def FMINNM_S32 : SInst<"vminnm", "...", "fQf">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && 
defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in {
+let ArchGuard = "defined(__aarch64__) && 
defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in {
 def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">;
 def FMINNM_S64 : SInst<"vminnm", "...", "dQd">;
 }
@@ -1283,7 +1283,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", 
"UccPcQUcQcQPc">;
 // itself during generation so, unlike all other intrinsics, this one should
 // include *all* types, not just additional ones.
 def VVREINTERPRET : 
REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk">
 {
-  let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)";
+  let ArchGuard = "defined(__aarch64__)";
   let BigEndianSafe = 1;
 }
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 8c30f4a - [AArch64] Always allow the __bf16 type

2022-08-04 Thread David Green via cfe-commits

Author: David Green
Date: 2022-08-04T18:35:27+01:00
New Revision: 8c30f4a5ab3e8dc4a75669d497723f9a2d8d39c8

URL: 
https://github.com/llvm/llvm-project/commit/8c30f4a5ab3e8dc4a75669d497723f9a2d8d39c8
DIFF: 
https://github.com/llvm/llvm-project/commit/8c30f4a5ab3e8dc4a75669d497723f9a2d8d39c8.diff

LOG: [AArch64] Always allow the __bf16 type

We would like to make the ACLE NEON and SVE intrinsics more useable by
gating them on the target, not by ifdef preprocessor macros. In order to
do this the types they use need to be available. This patches makes
__bf16 always available under AArch64 not just when the bf16
architecture feature is present. This bringing it in-line with GCC. In
subsequent patches the NEON bfloat16x8_t and SVE svbfloat16_t types
(along with bfloat16_t used in arm_sve.h) will be made unconditional
too.

The operations valid on the types are still very limited. They can be
used as a storage type, but the intrinsics used for convertions are
still behind an ifdef guard in arm_neon.h/arm_bf16.h.

Differential Revision: https://reviews.llvm.org/D130973

Added: 


Modified: 
clang/docs/LanguageExtensions.rst
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/Basic/Targets/AArch64.h
clang/test/CodeGen/arm-bf16-params-returns.c
clang/test/CodeGen/arm-mangle-bf16.cpp
clang/test/Sema/arm-bf16-forbidden-ops.c
clang/test/Sema/arm-bf16-forbidden-ops.cpp
clang/test/Sema/arm-bfloat.cpp

Removed: 




diff  --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 1b823638e6a63..52931cc9232ce 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -757,8 +757,6 @@ performing the operation, and then truncating to 
``_Float16``.
 * 32-bit ARM
 * 64-bit ARM (AArch64)
 
-The ``__bf16`` type is only available when supported in hardware.
-
 ``__fp16`` is a storage and interchange format only.  This means that values of
 ``__fp16`` are immediately promoted to (at least) ``float`` when used in 
arithmetic
 operations, so that e.g. the result of adding two ``__fp16`` values has type 
``float``.

diff  --git a/clang/lib/Basic/Targets/AArch64.cpp 
b/clang/lib/Basic/Targets/AArch64.cpp
index 60ef52ac3f0dd..8612138c3194f 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -670,6 +670,10 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector &Features,
   return true;
 }
 
+bool AArch64TargetInfo::hasBFloat16Type() const {
+  return true;
+}
+
 TargetInfo::CallingConvCheckResult
 AArch64TargetInfo::checkCallingConvention(CallingConv CC) const {
   switch (CC) {

diff  --git a/clang/lib/Basic/Targets/AArch64.h 
b/clang/lib/Basic/Targets/AArch64.h
index bd6812d1257cf..e28a4c5b63905 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -116,6 +116,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   bool handleTargetFeatures(std::vector &Features,
 DiagnosticsEngine &Diags) override;
 
+  bool hasBFloat16Type() const override;
+
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override;
 
   bool isCLZForZeroUndef() const override;

diff  --git a/clang/test/CodeGen/arm-bf16-params-returns.c 
b/clang/test/CodeGen/arm-bf16-params-returns.c
index c9e1efb0c2a01..d4d0e6cfd2571 100644
--- a/clang/test/CodeGen/arm-bf16-params-returns.c
+++ b/clang/test/CodeGen/arm-bf16-params-returns.c
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-abi aapcs 
-mfloat-abi hard -target-feature +bf16 -target-feature +neon -emit-llvm -O2 -o 
- %s | opt -S -mem2reg -sroa | FileCheck %s --check-prefix=CHECK32-HARD
 // RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-abi aapcs 
-mfloat-abi softfp -target-feature +bf16 -target-feature +neon -emit-llvm -O2 
-o - %s | opt -S -mem2reg -sroa | FileCheck %s --check-prefix=CHECK32-SOFTFP
-// RUN: %clang_cc1 -triple aarch64-arm-none-eabi -target-abi aapcs 
-target-feature +bf16 -target-feature +neon -emit-llvm -O2 -o - %s | opt -S 
-mem2reg -sroa | FileCheck %s --check-prefix=CHECK64
+// RUN: %clang_cc1 -triple aarch64-arm-none-eabi -target-abi aapcs 
-target-feature +bf16 -target-feature +neon -emit-llvm -O2 -o - %s | opt -S 
-mem2reg -sroa | FileCheck %s --check-prefixes=CHECK64,CHECK64NEON
+// RUN: %clang_cc1 -triple aarch64-arm-none-eabi -target-abi aapcs 
-target-feature -bf16 -target-feature +neon -DNONEON -emit-llvm -O2 -o - %s | 
opt -S -mem2reg -sroa | FileCheck %s --check-prefix=CHECK64
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
@@ -17,6 +18,8 @@ __bf16 test_ret_bf16(__bf16 v) {
 // CHECK64: define{{.*}} bfloat @test_ret_bf16(bfloat noundef returned %v) 
{{.*}} {
 // CHECK64: ret bfloat %v
 
+#ifndef NONEON
+
 bfloat16x4_t test_ret_bf16x4_t(bfloat16x4_t v) {
   return v;
 }
@@ -24,5 +27,7 @@ bfloat16x4_t test_ret_

[clang] 9727c77 - [NFC] Rename Instrinsic to Intrinsic

2022-04-25 Thread David Green via cfe-commits

Author: David Green
Date: 2022-04-25T18:13:23+01:00
New Revision: 9727c77d58ac920a4158d08c15659470e52ddda4

URL: 
https://github.com/llvm/llvm-project/commit/9727c77d58ac920a4158d08c15659470e52ddda4
DIFF: 
https://github.com/llvm/llvm-project/commit/9727c77d58ac920a4158d08c15659470e52ddda4.diff

LOG: [NFC] Rename Instrinsic to Intrinsic

Added: 


Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-nvptx-mma.py
clang/test/CodeGenCUDA/fp-contract.cu
clang/test/Profile/c-avoid-direct-call.c
clang/test/Profile/c-indirect-call.c
clang/test/Profile/cxx-indirect-call.cpp
llvm/include/llvm/Analysis/VectorUtils.h
llvm/include/llvm/CodeGen/MachineInstr.h
llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
llvm/include/llvm/IR/InstVisitor.h
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/include/llvm/IR/Metadata.h
llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h
llvm/include/llvm/Transforms/Utils/Local.h
llvm/lib/Analysis/CallGraphSCCPass.cpp
llvm/lib/Analysis/ConstantFolding.cpp
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/Analysis/VectorUtils.cpp
llvm/lib/CodeGen/ReplaceWithVeclib.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SjLjEHPrepare.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64StackTagging.cpp
llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
llvm/lib/Target/Mips/MipsISelLowering.h
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
llvm/lib/Target/PowerPC/README_P9.txt
llvm/lib/Target/X86/X86LowerAMXType.cpp
llvm/lib/Transforms/IPO/GlobalOpt.cpp
llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
llvm/lib/Transforms/Scalar/Scalarizer.cpp
llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
llvm/lib/Transforms/Utils/Local.cpp
llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
llvm/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/lib/Transforms/Utils/StripGCRelocates.cpp
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/CodeGen/Hexagon/hvx-byte-store-double.ll
llvm/test/DebugInfo/WebAssembly/dbg-declare.ll
llvm/test/Instrumentation/DataFlowSanitizer/unordered_atomic_mem_intrins.ll
llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll

llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll
llvm/test/Transforms/Inline/inline_constprop.ll
llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll
llvm/test/Transforms/SROA/basictest-opaque-ptrs.ll
llvm/test/Transforms/SROA/basictest.ll
llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index fc2d32f3e26fe..f9966c1fd777c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18200,7 +18200,7 @@ RValue CodeGenFunction::EmitBuiltinIsAligned(const 
CallExpr *E) {
 
 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
-/// llvm.ptrmask instrinsic (with a GEP before in the align_up case).
+/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
 /// TODO: actually use ptrmask once most optimization passes know about it.
 RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
   BuiltinAlignArgs Args(E, *this);

diff  --git a/clang/test/CodeGen/builtins-nvptx-mma.py 
b/clang/test/CodeGen/builtins-nvptx-mma.py
index dc40f04c11ce6..6c09910020278 100644
--- a/clang/test/CodeGen/builtins-nvptx-mma.py
+++ b/clang/test/CodeGen/builtins-nvptx-mma.py
@@ -1,5 +1,5 @@
 # This script generates all variants of wmma builtins, verifies that clang 
calls
-# correct LLVM instrinsics, and checks that availability of specific builtins 
is
+# correct LLVM intrinsics, and checks that availability of specific builtins is
 # constrained by the correct PTX version and the target GPU variant.
 
 # Dummy test run to avoid lit warnings.

diff  --git a/clang/test/CodeGenCUDA/fp-contract.cu 
b/clang/test/CodeGenCUDA/fp-contract.cu
index d466affded132..60824ba59ddfb 100644
--- a/clang/test/CodeGenCUDA/fp-contract.cu
+++ b/clang/test/CodeGenCUDA/fp-contract.cu
@@ -105,7 +105,7 @@
 
 // Explicit -ffp-contract=on -- fusing by front-end.
 // In IR,
-//mult/add in the same statement - llvm.fmuladd instrinsic emitted
+//mult/add in the same statement - llvm.fmuladd intrinsic emitt

[clang] 64816e6 - [AArch64] Support for Ampere1 core

2022-05-03 Thread David Green via cfe-commits

Author: Philipp Tomsich
Date: 2022-05-03T15:54:02+01:00
New Revision: 64816e68f4419a9e14c23be8aa96fa412bed7e12

URL: 
https://github.com/llvm/llvm-project/commit/64816e68f4419a9e14c23be8aa96fa412bed7e12
DIFF: 
https://github.com/llvm/llvm-project/commit/64816e68f4419a9e14c23be8aa96fa412bed7e12.diff

LOG: [AArch64] Support for Ampere1 core

Add support for the Ampere Computing Ampere1 core.
Ampere1 implements the AArch64 state and is compatible with ARMv8.6-A.

Differential Revision: https://reviews.llvm.org/D117112

Added: 
llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td

Modified: 
clang/test/Misc/target-invalid-cpu-note.c
llvm/include/llvm/Support/AArch64TargetParser.def
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/Target/AArch64/AArch64SchedPredicates.td
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/test/CodeGen/AArch64/cpus.ll
llvm/test/CodeGen/AArch64/neon-dot-product.ll
llvm/test/CodeGen/AArch64/remat.ll
llvm/test/MC/AArch64/armv8.2a-dotprod.s
llvm/test/MC/AArch64/armv8.3a-rcpc.s
llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
llvm/unittests/Support/TargetParserTest.cpp

Removed: 




diff  --git a/clang/test/Misc/target-invalid-cpu-note.c 
b/clang/test/Misc/target-invalid-cpu-note.c
index b240eb1428f78..7c7e7e326cf15 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -5,11 +5,11 @@
 
 // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 
2>&1 | FileCheck %s --check-prefix AARCH64
 // AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, 
cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, 
cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, 
cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, 
cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, 
cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, 
apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, 
exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, 
thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}}
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, 
cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, 
cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, 
cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, 
cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, 
cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, 
apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, 
exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, 
thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
 
 // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 
2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
-// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, 
cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, 
cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, 
cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, 
cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, 
neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, 
apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, 
exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, 
thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}}
+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, 
cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, 
cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, 
cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, 
cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, 
neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, 
apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, 
exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, 
thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, 
ampere1{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 
2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'

diff  --git a/llvm/include/llvm/Support/AArch64TargetParser.def 
b/llvm/include/llvm/Support/AArch64TargetParser.def
index 44b73fa2f0b64..bae6

[clang] [clang][AArch64] Enable fp128 for aarch64 linux target (PR #85070)

2024-03-14 Thread David Green via cfe-commits

https://github.com/davemgreen commented:

Hi - I think this looks sensible, considering that long double == fp128. Should 
we be doing the same for other OS's in this file too? 

https://github.com/llvm/llvm-project/pull/85070
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add support for Cortex-A520AE and Cortex-A720AE CPUs (PR #85401)

2024-03-17 Thread David Green via cfe-commits


@@ -67,6 +67,8 @@ Changes to Interprocedural Optimizations
 Changes to the AArch64 Backend
 --
 
+* Added support for Cortex-A520AE and Cortex-A720AE CPUs.

davemgreen wrote:

Could this have Cortex-A78AE too?

https://github.com/llvm/llvm-project/pull/85401
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add support for Cortex-A520AE and Cortex-A720AE CPUs (PR #85401)

2024-03-17 Thread David Green via cfe-commits


@@ -58,6 +58,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo 
{
 CortexA55,
 CortexA510,
 CortexA520,
+CortexA520AE,

davemgreen wrote:

These might not be worth adding, considering they should be the same as 
CortexA520, and could reuse the same enum.

https://github.com/llvm/llvm-project/pull/85401
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add support for Cortex-A520AE and Cortex-A720AE CPUs (PR #85401)

2024-03-19 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

Thanks. LGTM

https://github.com/llvm/llvm-project/pull/85401
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [ARM] __ARM_ARCH macro definition fix (PR #81493)

2024-02-13 Thread David Green via cfe-commits

davemgreen wrote:

I'm a little worried people might be relying on the existing behaviour, with 
both clang and GCC having this wrong for a while. If we are going to do it can 
you add a release note to clang explaining the new behaviour?

https://github.com/llvm/llvm-project/pull/81493
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[flang] [compiler-rt] [clang] [llvm] [mlir] [clang-tools-extra] [lldb] [libc] [libcxx] [AArch64] add intrinsic to generate a bfi instruction (PR #79672)

2024-01-31 Thread David Green via cfe-commits

davemgreen wrote:

I see. The issue is that the opposite is often true as well - if we add a 
target specific intrinsic for this then, whilst we get a single instruction 
being emitted, we don't see all the other optimizations that the compiler can 
and should be performing.

Things like constant folding, combining into other instructions, known-bits 
analysis or any form of vectorization will all be blocked by the intrinsic. It 
can take quite some work to add all those features in (if they are possible), 
and without them can potentially lead to worse results. Plus more things to 
maintain.

BFI isn't a trivial instructions to match as it involves certain masks and 
shifts. There might certainly be advantages to having an intrinsic. I would 
like to try and see what the problems would be with generated code using normal 
operations first though, if we can. If there are optimizations we can make 
based on the existing code then that would help in all cases (c, mlir, rust, 
etc), not just frontends that are producing the intrinsics.

https://github.com/llvm/llvm-project/pull/79672
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang-tools-extra] [compiler-rt] [llvm] [flang] [TTI]Fallback to SingleSrcPermute shuffle kind, if no direct estimation for (PR #79837)

2024-02-01 Thread David Green via cfe-commits

davemgreen wrote:

I think this is probably OK for Arm & AArch64. In the long run we should 
ideally be adding better extract subvector costs, but this patch moves the cost 
in that direction.

https://github.com/llvm/llvm-project/pull/79837
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang-tools-extra] [clang] [AArch64] Implement -fno-plt for SelectionDAG/GlobalISel (PR #78890)

2024-02-01 Thread David Green via cfe-commits


@@ -201,17 +201,27 @@ define dso_local void @rv_marker_3() personality ptr 
@__gxx_personality_v0 {
 ; GISEL-NEXT:bl _objc_object
 ; GISEL-NEXT:  Ltmp1:
 ; GISEL-NEXT:  ; %bb.1: ; %invoke.cont
-; GISEL-NEXT:ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; GISEL-NEXT:  Lloh0:
+; GISEL-NEXT:adrp x1, _objc_release@GOTPAGE
 ; GISEL-NEXT:mov x0, x19
+; GISEL-NEXT:  Lloh1:
+; GISEL-NEXT:ldr x1, [x1, _objc_release@GOTPAGEOFF]
+; GISEL-NEXT:ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; GISEL-NEXT:ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; GISEL-NEXT:b _objc_release
+; GISEL-NEXT:br x1

davemgreen wrote:

@fhahn, @TNorthover do these sound OK to you?

https://github.com/llvm/llvm-project/pull/78890
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64][TargetParser] Add mcpu alias for Microsoft Azure Cobalt 100. (PR #79614)

2024-01-26 Thread David Green via cfe-commits

davemgreen wrote:

It looks like this needs to update testAArch64CPUArchList too. Otherwise it LGTM

https://github.com/llvm/llvm-project/pull/79614
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libc] [clang-tools-extra] [lldb] [flang] [mlir] [llvm] [clang] [compiler-rt] [libcxx] [AArch64] add intrinsic to generate a bfi instruction (PR #79672)

2024-01-28 Thread David Green via cfe-commits

davemgreen wrote:

Hello. Can you explain why this is needed, as opposed to using the equivalent 
shift/and/ors?

https://github.com/llvm/llvm-project/pull/79672
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [compiler-rt] [libc] [flang] [mlir] [libcxx] [lldb] [llvm] [clang] [AArch64] add intrinsic to generate a bfi instruction (PR #79672)

2024-01-29 Thread David Green via cfe-commits

davemgreen wrote:

OK. We would not usually add intrinsics like this without a strong motivating 
case, that could not be optimized in some other way. It is better to use target 
independent options when available, and inline assembly is available as a 
fallback if it is really needed. But I would recommend that they use normal 
and/or/shift operations and let us know about places the compiler isn't 
optimizing them as well as it could be.

https://github.com/llvm/llvm-project/pull/79672
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][TargetParser] Add mcpu alias for Microsoft Azure Cobalt 100. (PR #79614)

2024-01-29 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

Thanks. LGTM too.

https://github.com/llvm/llvm-project/pull/79614
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [AArch64][SVE2] Lower OR to SLI/SRI (PR #77555)

2024-01-11 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/77555
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [TargetParser] Define AEK_FCMA and AEK_JSCVT for tsv110 (PR #75516)

2024-01-14 Thread David Green via cfe-commits

https://github.com/davemgreen approved this pull request.

https://github.com/ARM-software/acle/pull/279 was committed recently, where I 
think this lines up with the final version of it. I think this LGTM in that 
case.

https://github.com/llvm/llvm-project/pull/75516
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 170de3d - [ParserTest] Move raw string literal out of macro

2020-01-05 Thread David Green via cfe-commits

Author: David Green
Date: 2020-01-05T11:24:04Z
New Revision: 170de3de2eea8eb7f514dfa64d3f845ef10d8425

URL: 
https://github.com/llvm/llvm-project/commit/170de3de2eea8eb7f514dfa64d3f845ef10d8425
DIFF: 
https://github.com/llvm/llvm-project/commit/170de3de2eea8eb7f514dfa64d3f845ef10d8425.diff

LOG: [ParserTest] Move raw string literal out of macro

Some combinations of gcc and ccache do not deal well with raw strings in
macros. Moving the string out to attempt to fix the bots.

Added: 


Modified: 
clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp

Removed: 




diff  --git a/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp 
b/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp
index 67fc70790296..6c07c8390256 100644
--- a/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp
+++ b/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp
@@ -437,9 +437,9 @@ decl()))matcher";
 )matcher";
 M = Parser::parseMatcherExpression(Code, nullptr, nullptr, &Error);
 EXPECT_FALSE(M.hasValue());
-EXPECT_EQ(R"error(1:1: Error parsing argument 1 for matcher varDecl.
-2:3: Matcher not found: doesNotExist)error",
-  Error.toStringFull());
+StringRef Expected = R"error(1:1: Error parsing argument 1 for matcher 
varDecl.
+2:3: Matcher not found: doesNotExist)error";
+EXPECT_EQ(Expected, Error.toStringFull());
   }
 }
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] ab2ed8b - [SVE] Regenerate sve vector bits tests. NFC

2020-09-11 Thread David Green via cfe-commits

Author: David Green
Date: 2020-09-11T18:51:57+01:00
New Revision: ab2ed8bce9e924a2fc734ca4369419c18d124043

URL: 
https://github.com/llvm/llvm-project/commit/ab2ed8bce9e924a2fc734ca4369419c18d124043
DIFF: 
https://github.com/llvm/llvm-project/commit/ab2ed8bce9e924a2fc734ca4369419c18d124043.diff

LOG: [SVE] Regenerate sve vector bits tests. NFC

Added: 


Modified: 
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c

Removed: 




diff  --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c 
b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
index cab424c3dbe1..84559e9edb9a 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -31,21 +31,21 @@ DEFINE_STRUCT(bool)
 // CHECK-128-NEXT:  entry:
 // CHECK-128-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
 // CHECK-128-NEXT:[[TMP0:%.*]] = bitcast <2 x i64>* [[ARRAYIDX]] to 
*
-// CHECK-128-NEXT:[[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:[[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]]
 // CHECK-128-NEXT:ret  [[TMP1]]
 //
 // CHECK-256-LABEL: @read_int64(
 // CHECK-256-NEXT:  entry:
 // CHECK-256-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
 // CHECK-256-NEXT:[[TMP0:%.*]] = bitcast <4 x i64>* [[ARRAYIDX]] to 
*
-// CHECK-256-NEXT:[[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:[[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]]
 // CHECK-256-NEXT:ret  [[TMP1]]
 //
 // CHECK-512-LABEL: @read_int64(
 // CHECK-512-NEXT:  entry:
 // CHECK-512-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
 // CHECK-512-NEXT:[[TMP0:%.*]] = bitcast <8 x i64>* [[ARRAYIDX]] to 
*
-// CHECK-512-NEXT:[[TMP1:%.*]] = load , * [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:[[TMP1:%.*]] = load , * [[TMP0]], align 16, [[TBAA2:!tbaa !.*]]
 // CHECK-512-NEXT:ret  [[TMP1]]
 //
 svint64_t read_int64(struct struct_int64 *s) {
@@ -55,31 +55,31 @@ svint64_t read_int64(struct struct_int64 *s) {
 // CHECK-128-LABEL: @write_int64(
 // CHECK-128-NEXT:  entry:
 // CHECK-128-NEXT:[[X_ADDR:%.*]] = alloca , align 16
-// CHECK-128-NEXT:store  [[X:%.*]], * 
[[X_ADDR]], align 16, !tbaa !5
+// CHECK-128-NEXT:store  [[X:%.*]], * 
[[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]]
 // CHECK-128-NEXT:[[TMP0:%.*]] = bitcast * [[X_ADDR]] to 
<2 x i64>*
-// CHECK-128-NEXT:[[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], 
align 16, !tbaa !2
+// CHECK-128-NEXT:[[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], 
align 16, [[TBAA2]]
 // CHECK-128-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-128-NEXT:store <2 x i64> [[TMP1]], <2 x i64>* [[ARRAYIDX]], align 
16, !tbaa !2
+// CHECK-128-NEXT:store <2 x i64> [[TMP1]], <2 x i64>* [[ARRAYIDX]], align 
16, [[TBAA2]]
 // CHECK-128-NEXT:ret void
 //
 // CHECK-256-LABEL: @write_int64(
 // CHECK-256-NEXT:  entry:
 // CHECK-256-NEXT:[[X_ADDR:%.*]] = alloca , align 16
-// CHECK-256-NEXT:store  [[X:%.*]], * 
[[X_ADDR]], align 16, !tbaa !5
+// CHECK-256-NEXT:store  [[X:%.*]], * 
[[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]]
 // CHECK-256-NEXT:[[TMP0:%.*]] = bitcast * [[X_ADDR]] to 
<4 x i64>*
-// CHECK-256-NEXT:[[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], 
align 16, !tbaa !2
+// CHECK-256-NEXT:[[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], 
align 16, [[TBAA2]]
 // CHECK-256-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds 
[[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
-// CHECK-256-NEXT:store <4 x i64> [[TMP1]], <4 x i64>* [[ARRAYIDX]], align 
16, !tbaa !2
+// CHECK-256-NEXT:store <4 x i64> [[TMP1]], <4 x i64>* [[ARRAYIDX]], align 
16, [[TBAA2]]
 // CHECK-256-NEXT:ret void
 //
 // CHECK-512-LABEL: @write_int64(
 // CHECK-512-NEXT:  entry:
 // CHECK-512-NEXT:[[X_ADDR:%.*]] = alloca , align 16
-// CHECK-512-NEXT:store  [[X:%.*]], * 
[[X_ADDR]], align 16, !tbaa !5
+// CHECK-512-NEXT:store  [[X:%.*]], * 
[[X_ADDR]], align 16, [[TBAA5:!tbaa !.*]]
 // CHECK-512-NEXT:[[TMP0:%.*]] = bitcast * [[X_ADDR]] to 
<8 x i64>*
-// CHECK-512-NEXT:[[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], 
align 16, !tbaa !2
+// CHECK-512-NEXT:[[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], 
align 16, [[TBAA2]]
 // CHECK-512-NEXT:[[ARRAYIDX:%.*]

[clang] 667e800 - [ARM] Remove -O3 from mve intrinsic tests. NFC

2020-09-06 Thread David Green via cfe-commits

Author: David Green
Date: 2020-09-06T13:19:55+01:00
New Revision: 667e800bb3a8c1bdda0cabad7549c766b3424064

URL: 
https://github.com/llvm/llvm-project/commit/667e800bb3a8c1bdda0cabad7549c766b3424064
DIFF: 
https://github.com/llvm/llvm-project/commit/667e800bb3a8c1bdda0cabad7549c766b3424064.diff

LOG: [ARM] Remove -O3 from mve intrinsic tests. NFC

Added: 


Modified: 
clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c
clang/test/CodeGen/arm-mve-intrinsics/vmaxnmaq.c
clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c
clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c
clang/test/CodeGen/arm-mve-intrinsics/vminaq.c
clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c
clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c
clang/test/CodeGen/arm-mve-intrinsics/vminq.c

Removed: 




diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c
index 03ab37474ba0..a656657b6619 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxaq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 
-disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | 
FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone 
-S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone 
-DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include 
 
@@ -9,8 +9,8 @@
 // CHECK-NEXT:[[TMP0:%.*]] = icmp slt <16 x i8> [[B:%.*]], zeroinitializer
 // CHECK-NEXT:[[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[B]]
 // CHECK-NEXT:[[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> 
[[TMP1]], <16 x i8> [[B]]
-// CHECK-NEXT:[[TMP3:%.*]] = icmp ugt <16 x i8> [[TMP2]], [[A:%.*]]
-// CHECK-NEXT:[[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> 
[[TMP2]], <16 x i8> [[A]]
+// CHECK-NEXT:[[TMP3:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[TMP2]]
+// CHECK-NEXT:[[TMP4:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> [[A]], 
<16 x i8> [[TMP2]]
 // CHECK-NEXT:ret <16 x i8> [[TMP4]]
 //
 uint8x16_t test_vmaxaq_s8(uint8x16_t a, int8x16_t b)
@@ -27,8 +27,8 @@ uint8x16_t test_vmaxaq_s8(uint8x16_t a, int8x16_t b)
 // CHECK-NEXT:[[TMP0:%.*]] = icmp slt <8 x i16> [[B:%.*]], zeroinitializer
 // CHECK-NEXT:[[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[B]]
 // CHECK-NEXT:[[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP1]], 
<8 x i16> [[B]]
-// CHECK-NEXT:[[TMP3:%.*]] = icmp ugt <8 x i16> [[TMP2]], [[A:%.*]]
-// CHECK-NEXT:[[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP2]], 
<8 x i16> [[A]]
+// CHECK-NEXT:[[TMP3:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[TMP2]]
+// CHECK-NEXT:[[TMP4:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[A]], <8 
x i16> [[TMP2]]
 // CHECK-NEXT:ret <8 x i16> [[TMP4]]
 //
 uint16x8_t test_vmaxaq_s16(uint16x8_t a, int16x8_t b)
@@ -45,8 +45,8 @@ uint16x8_t test_vmaxaq_s16(uint16x8_t a, int16x8_t b)
 // CHECK-NEXT:[[TMP0:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
 // CHECK-NEXT:[[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[B]]
 // CHECK-NEXT:[[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP1]], 
<4 x i32> [[B]]
-// CHECK-NEXT:[[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP2]], [[A:%.*]]
-// CHECK-NEXT:[[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], 
<4 x i32> [[A]]
+// CHECK-NEXT:[[TMP3:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[TMP2]]
+// CHECK-NEXT:[[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[A]], <4 
x i32> [[TMP2]]
 // CHECK-NEXT:ret <4 x i32> [[TMP4]]
 //
 uint32x4_t test_vmaxaq_s32(uint32x4_t a, int32x4_t b)
@@ -61,8 +61,8 @@ uint32x4_t test_vmaxaq_s32(uint32x4_t a, int32x4_t b)
 // CHECK-LABEL: @test_vmaxaq_m_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:[[TMP1:%.*]] = tail call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:[[TMP2:%.*]] = tail call <16 x i8> 
@llvm.arm.mve.vmaxa.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> 
[[B:%.*]], <16 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP1:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = call <16 x i8> 
@llvm.arm.mve.vmaxa.predicated.v16i8.v

[clang] a15bd0b - [AIX] Add REQUIRES for powerpc test. NFC

2020-10-08 Thread David Green via cfe-commits

Author: David Green
Date: 2020-10-08T18:40:09+01:00
New Revision: a15bd0bfc20c2b2955c59450a67b6e8efe89c708

URL: 
https://github.com/llvm/llvm-project/commit/a15bd0bfc20c2b2955c59450a67b6e8efe89c708
DIFF: 
https://github.com/llvm/llvm-project/commit/a15bd0bfc20c2b2955c59450a67b6e8efe89c708.diff

LOG: [AIX] Add REQUIRES for powerpc test. NFC

Added: 


Modified: 
clang/test/CodeGen/aix-ignore-xcoff-visibility.cpp

Removed: 




diff  --git a/clang/test/CodeGen/aix-ignore-xcoff-visibility.cpp 
b/clang/test/CodeGen/aix-ignore-xcoff-visibility.cpp
index 0fccce650094..ef424cdb7d41 100644
--- a/clang/test/CodeGen/aix-ignore-xcoff-visibility.cpp
+++ b/clang/test/CodeGen/aix-ignore-xcoff-visibility.cpp
@@ -1,3 +1,4 @@
+// REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -triple powerpc-unknown-aix -o - -x c++ -S  %s  |\
 // RUN:   FileCheck --check-prefix=IGNOREVISIBILITY-ASM %s
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 40ce58d - Revert "[clang] Refactor AST printing tests to share more infrastructure"

2021-07-13 Thread David Green via cfe-commits

Author: David Green
Date: 2021-07-14T04:40:47+01:00
New Revision: 40ce58d0ca10a1195da82895749b67f30f000243

URL: 
https://github.com/llvm/llvm-project/commit/40ce58d0ca10a1195da82895749b67f30f000243
DIFF: 
https://github.com/llvm/llvm-project/commit/40ce58d0ca10a1195da82895749b67f30f000243.diff

LOG: Revert "[clang] Refactor AST printing tests to share more infrastructure"

This reverts commit 20176bc7dd3f431db4c3d59b51a9f53d52190c82 as some
versions of GCC do not seem to handle the new code very well. They
complain about:

/tmp/ccqUQZyw.s: Assembler messages:
/tmp/ccqUQZyw.s:1151: Error: symbol 
`_ZNSt14_Function_base13_Base_managerIN5clangUlPKNS1_4StmtEE2_EE10_M_managerERSt9_Any_dataRKS7_St18_Manager_operation'
 is already defined
/tmp/ccqUQZyw.s:11963: Error: symbol 
`_ZNSt17_Function_handlerIFbPKN5clang4StmtEENS0_UlS3_E2_EE9_M_invokeERKSt9_Any_dataOS3_'
 is already defined

This seems like it is some GCC issue, but multiple buildbots (and my
local machine) are all failing because of it.

Added: 


Modified: 
clang/unittests/AST/ASTPrint.h
clang/unittests/AST/DeclPrinterTest.cpp
clang/unittests/AST/NamedDeclPrinterTest.cpp
clang/unittests/AST/StmtPrinterTest.cpp

Removed: 




diff  --git a/clang/unittests/AST/ASTPrint.h b/clang/unittests/AST/ASTPrint.h
index 0e35846c86f47..c3b6b842316d9 100644
--- a/clang/unittests/AST/ASTPrint.h
+++ b/clang/unittests/AST/ASTPrint.h
@@ -19,88 +19,72 @@
 
 namespace clang {
 
-using PrintingPolicyAdjuster = llvm::function_ref;
-
-template 
-using NodePrinter =
-std::function;
-
-template 
-using NodeFilter = std::function;
+using PolicyAdjusterType =
+Optional>;
+
+static void PrintStmt(raw_ostream &Out, const ASTContext *Context,
+  const Stmt *S, PolicyAdjusterType PolicyAdjuster) {
+  assert(S != nullptr && "Expected non-null Stmt");
+  PrintingPolicy Policy = Context->getPrintingPolicy();
+  if (PolicyAdjuster)
+(*PolicyAdjuster)(Policy);
+  S->printPretty(Out, /*Helper*/ nullptr, Policy);
+}
 
-template 
 class PrintMatch : public ast_matchers::MatchFinder::MatchCallback {
-  using PrinterT = NodePrinter;
-  using FilterT = NodeFilter;
-
   SmallString<1024> Printed;
-  unsigned NumFoundNodes;
-  PrinterT Printer;
-  FilterT Filter;
-  PrintingPolicyAdjuster PolicyAdjuster;
+  unsigned NumFoundStmts;
+  PolicyAdjusterType PolicyAdjuster;
 
 public:
-  PrintMatch(PrinterT Printer, PrintingPolicyAdjuster PolicyAdjuster,
- FilterT Filter)
-  : NumFoundNodes(0), Printer(std::move(Printer)),
-Filter(std::move(Filter)), PolicyAdjuster(PolicyAdjuster) {}
+  PrintMatch(PolicyAdjusterType PolicyAdjuster)
+  : NumFoundStmts(0), PolicyAdjuster(PolicyAdjuster) {}
 
   void run(const ast_matchers::MatchFinder::MatchResult &Result) override {
-const NodeType *N = Result.Nodes.getNodeAs("id");
-if (!N || !Filter(N))
+const Stmt *S = Result.Nodes.getNodeAs("id");
+if (!S)
   return;
-NumFoundNodes++;
-if (NumFoundNodes > 1)
+NumFoundStmts++;
+if (NumFoundStmts > 1)
   return;
 
 llvm::raw_svector_ostream Out(Printed);
-Printer(Out, Result.Context, N, PolicyAdjuster);
+PrintStmt(Out, Result.Context, S, PolicyAdjuster);
   }
 
   StringRef getPrinted() const { return Printed; }
 
-  unsigned getNumFoundNodes() const { return NumFoundNodes; }
+  unsigned getNumFoundStmts() const { return NumFoundStmts; }
 };
 
-template 
-::testing::AssertionResult PrintedNodeMatches(
-StringRef Code, const std::vector &Args,
-const Matcher &NodeMatch, StringRef ExpectedPrinted, StringRef FileName,
-NodePrinter Printer,
-PrintingPolicyAdjuster PolicyAdjuster = nullptr, bool AllowError = false,
-NodeFilter Filter = [](const NodeType *) { return true; }) {
+template 
+::testing::AssertionResult
+PrintedStmtMatches(StringRef Code, const std::vector &Args,
+   const T &NodeMatch, StringRef ExpectedPrinted,
+   PolicyAdjusterType PolicyAdjuster = None) {
 
-  PrintMatch Callback(Printer, PolicyAdjuster, Filter);
+  PrintMatch Printer(PolicyAdjuster);
   ast_matchers::MatchFinder Finder;
-  Finder.addMatcher(NodeMatch, &Callback);
+  Finder.addMatcher(NodeMatch, &Printer);
   std::unique_ptr Factory(
   tooling::newFrontendActionFactory(&Finder));
 
-  bool ToolResult;
-  if (FileName.empty()) {
-ToolResult = tooling::runToolOnCodeWithArgs(Factory->create(), Code, Args);
-  } else {
-ToolResult =
-tooling::runToolOnCodeWithArgs(Factory->create(), Code, Args, 
FileName);
-  }
-  if (!ToolResult && !AllowError)
+  if (!tooling::runToolOnCodeWithArgs(Factory->create(), Code, Args))
 return testing::AssertionFailure()
<< "Parsing error in \"" << Code.str() << "\"";
 
-  if (Callback.getNumFoundNodes() == 0)
-return testing::AssertionFailure() << "Matcher didn't find any nodes";
+  if (Printer.getNumFoundStm

[clang] 0b83a18 - [AArch64] Enablement of Cortex-X2

2021-11-01 Thread David Green via cfe-commits

Author: Mubashar Ahmad
Date: 2021-11-01T11:55:24Z
New Revision: 0b83a18a2b9db9c23082e8751c3a66ef37fc626f

URL: 
https://github.com/llvm/llvm-project/commit/0b83a18a2b9db9c23082e8751c3a66ef37fc626f
DIFF: 
https://github.com/llvm/llvm-project/commit/0b83a18a2b9db9c23082e8751c3a66ef37fc626f.diff

LOG: [AArch64] Enablement of Cortex-X2

Enables support for Cortex-X2 cores.

Differential Revision: https://reviews.llvm.org/D112459

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
clang/test/Driver/aarch64-cpus.c
clang/test/Misc/target-invalid-cpu-note.c
llvm/include/llvm/Support/AArch64TargetParser.def
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/unittests/Support/TargetParserTest.cpp

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b4595b20def53..ba15803e6f482 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -193,6 +193,8 @@ Arm and AArch64 Support in Clang
 
 - Support has been added for the following processors (command-line 
identifiers in parentheses):
   - Arm Cortex-A510 (``cortex-a510``)
+  - Arm Cortex-X2 (``cortex-x2``)
+
 - The -mtune flag is no longer ignored for AArch64. It is now possible to
   tune code generation for a particular CPU with -mtune without setting any
   architectural features. For example, compiling with
@@ -200,7 +202,6 @@ Arm and AArch64 Support in Clang
   architecture features, but will enable certain optimizations specific to
   Cortex-A57 CPUs and enable the use of a more accurate scheduling model.
 
-
 Internal API Changes
 
 

diff  --git a/clang/test/Driver/aarch64-cpus.c 
b/clang/test/Driver/aarch64-cpus.c
index 4f049c79dac1b..1c64e34608377 100644
--- a/clang/test/Driver/aarch64-cpus.c
+++ b/clang/test/Driver/aarch64-cpus.c
@@ -404,6 +404,15 @@
 // RUN: %clang -target aarch64 -mcpu=cortex-a510+crypto -### -c %s 2>&1 | 
FileCheck -check-prefix=CORTEX-A510-CRYPTO %s
 // CORTEX-A510-CRYPTO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" 
"-target-feature" "+sm4" "-target-feature" "+sha3" "-target-feature" "+sha2" 
"-target-feature" "+aes"
 
+// RUN: %clang -target aarch64 -mcpu=cortex-x2 -### -c %s 2>&1 | FileCheck 
-check-prefix=CORTEX-X2 %s
+// CORTEX-X2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x2"
+// CORTEX-X2-NOT: "-target-feature" "{{[+-]}}sm4"
+// CORTEX-X2-NOT: "-target-feature" "{{[+-]}}sha3"
+// CORTEX-X2-NOT: "-target-feature" "{{[+-]}}aes"
+// CORTEX-X2-SAME: {{$}}
+// RUN: %clang -target aarch64 -mcpu=cortex-x2+crypto -### -c %s 2>&1 | 
FileCheck -check-prefix=CORTEX-X2-CRYPTO %s
+// CORTEX-X2-CRYPTO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" 
"+sm4" "-target-feature" "+sha3" "-target-feature" "+sha2" "-target-feature" 
"+aes"
+
 // RUN: %clang -target aarch64_be -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck 
-check-prefix=CA57-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a57 -### -c %s 2>&1 | 
FileCheck -check-prefix=CA57-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a57 -### -c %s 
2>&1 | FileCheck -check-prefix=CA57-BE %s

diff  --git a/clang/test/Misc/target-invalid-cpu-note.c 
b/clang/test/Misc/target-invalid-cpu-note.c
index efdc92263e7e9..62aabab678172 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -5,11 +5,11 @@
 
 // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 
2>&1 | FileCheck %s --check-prefix AARCH64
 // AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, 
cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, 
cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, 
cortex-a78, cortex-a78c, cortex-r82, cortex-x1, neoverse-e1, neoverse-n1, 
neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, 
apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, 
apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, 
thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, 
tsv110, a64fx, carmel{{$}}
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, 
cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, 
cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, 
cortex-a78, cortex-a78c, cortex-r82, cortex-x1, cortex-x2, neoverse-e1, 
neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, 
apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, 
apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, 
kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, 
thunderxt83, tsv110, a64fx, carmel{

[clang] 5ae949a - [Clang][ARM] Reenable arm_acle.c test.

2021-03-12 Thread David Green via cfe-commits

Author: David Green
Date: 2021-03-12T19:21:21Z
New Revision: 5ae949a9276542b46f41374fbe7aee01e480d9d6

URL: 
https://github.com/llvm/llvm-project/commit/5ae949a9276542b46f41374fbe7aee01e480d9d6
DIFF: 
https://github.com/llvm/llvm-project/commit/5ae949a9276542b46f41374fbe7aee01e480d9d6.diff

LOG: [Clang][ARM] Reenable arm_acle.c test.

This test was apparently disabled in 6fcd4e080f09c9765d6, without any
sign of how it was going to be reenabled. This patch rewrites the test
to use update_cc_test_checks, with midend optimizations other that
mem2reg disabled.

Added: 


Modified: 
clang/test/CodeGen/arm_acle.c

Removed: 




diff  --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 9f0ad22bda4f..7e85c767c301 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1,125 +1,229 @@
-// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 
-O2  -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch32 -check-prefix=ARM-LEGACY 
-check-prefix=AArch32-LEGACY
-// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 
-O2  -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch32 -check-prefix=ARM-NEWPM 
-check-prefix=AArch32-NEWPM
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +neon -target-feature +crc -target-feature +crypto -O2 
-fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-LEGACY 
-check-prefix=AArch64-LEGACY
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +neon -target-feature +crc -target-feature +crypto -O2 
-fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-NEWPM 
-check-prefix=AArch64-NEWPM
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.3a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - 
%s | FileCheck %s -check-prefix=AArch64-v8_3
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.4a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - 
%s | FileCheck %s -check-prefix=AArch64-v8_3
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.5a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - 
%s | FileCheck %s -check-prefix=AArch64-v8_3
-
-// REQUIRES: rewrite
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 
-O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s 
-check-prefixes=ARM,AArch32
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +neon -target-feature +crc -target-feature +crypto -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s 
-check-prefixes=ARM,AArch64
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.3a -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.5a -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483
 
 #include 
 
 /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
 /* 8.3 Memory Barriers */
-// ARM-LABEL: test_dmb
-// AArch32: call void @llvm.arm.dmb(i32 1)
-// AArch64: call void @llvm.aarch64.dmb(i32 1)
+
+// AArch32-LABEL: @test_dmb(
+// AArch32-NEXT:  entry:
+// AArch32-NEXT:call void @llvm.arm.dmb(i32 1)
+// AArch32-NEXT:ret void
+//
+// AArch64-LABEL: @test_dmb(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:call void @llvm.aarch64.dmb(i32 1)
+// AArch64-NEXT:ret void
+//
 void test_dmb(void) {
   __dmb(1);
 }
 
-// ARM-LABEL: test_dsb
-// AArch32: call void @llvm.arm.dsb(i32 2)
-// AArch64: call void @llvm.aarch64.dsb(i32 2)
+// AArch32-LABEL: @test_dsb(
+// AArch32-NEXT:  entry:
+// AArch32-NEXT:call void @llvm.arm.dsb(i32 2)
+// AArch32-NEXT:ret void
+//
+// AArch64-LABEL: @test_dsb(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:call void @llvm.aarch64.dsb(i32 2)
+// AArch64-NEXT:ret void
+//
 void test_dsb(void) {
   __dsb(2);
 }
 
-// ARM-LABEL: test_isb
-// AArch32: call void @llvm.arm.isb(i32 3)
-// AArch64: call void @llvm.aarch64.isb(i32 3)
+// AArch32-LABEL: @test_isb(
+// AArch32-NEXT:  entry:
+// AArch32-NEXT:call void @llvm.arm.isb(i32 3)
+// AArch32-NEXT:ret void
+//
+// AArch64-LABEL: @test_isb(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:call void @llv

[clang] 2b3c813 - [Clang][ARM] Reenable arm_acle.c test.

2021-03-14 Thread David Green via cfe-commits

Author: David Green
Date: 2021-03-14T10:59:24Z
New Revision: 2b3c8131434374caf3825c2bd4e012c85f8128de

URL: 
https://github.com/llvm/llvm-project/commit/2b3c8131434374caf3825c2bd4e012c85f8128de
DIFF: 
https://github.com/llvm/llvm-project/commit/2b3c8131434374caf3825c2bd4e012c85f8128de.diff

LOG: [Clang][ARM] Reenable arm_acle.c test.

This test was apparently disabled in 6fcd4e080f09c9765d6, without any
sign of how it was going to be reenabled. This patch rewrites the test
to use update_cc_test_checks, with midend optimizations other that
mem2reg disabled.

The first attempt of this patch in 5ae949a9276542b46 failed on bots even
though it worked locally.  I've attempted to adjust the RUN lines and
made the test AArch64/ARM specific.

Differential Revision: https://reviews.llvm.org/D98510

Added: 


Modified: 
clang/test/CodeGen/arm_acle.c

Removed: 




diff  --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 9f0ad22bda4f..dead8a085f86 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -1,125 +1,231 @@
-// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 
-O2  -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch32 -check-prefix=ARM-LEGACY 
-check-prefix=AArch32-LEGACY
-// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 
-O2  -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch32 -check-prefix=ARM-NEWPM 
-check-prefix=AArch32-NEWPM
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +neon -target-feature +crc -target-feature +crypto -O2 
-fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-LEGACY 
-check-prefix=AArch64-LEGACY
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +neon -target-feature +crc -target-feature +crypto -O2 
-fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s 
-check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-NEWPM 
-check-prefix=AArch64-NEWPM
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.3a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - 
%s | FileCheck %s -check-prefix=AArch64-v8_3
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.4a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - 
%s | FileCheck %s -check-prefix=AArch64-v8_3
-// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 
-target-feature +v8.5a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - 
%s | FileCheck %s -check-prefix=AArch64-v8_3
-
-// REQUIRES: rewrite
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -target-feature 
+crc -target-feature +dsp -O0 -disable-O0-optnone 
-fexperimental-new-pass-manager -S -emit-llvm -o - %s | opt -S -mem2reg | 
FileCheck %s -check-prefixes=ARM,AArch32
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-none-eabi -target-feature 
+neon -target-feature +crc -target-feature +crypto -O0 -disable-O0-optnone 
-fexperimental-new-pass-manager -S -emit-llvm -o - %s | opt -S -mem2reg | 
FileCheck %s -check-prefixes=ARM,AArch64
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-none-eabi -target-feature 
+v8.3a -O0 -disable-O0-optnone -fexperimental-new-pass-manager -S -emit-llvm -o 
- %s | opt -S -mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-none-eabi -target-feature 
+v8.5a -O0 -disable-O0-optnone -fexperimental-new-pass-manager -S -emit-llvm -o 
- %s | opt -S -mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483
 
 #include 
 
+// REQUIRES: arm-registered-target,aarch64-registered-target
+
 /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
 /* 8.3 Memory Barriers */
-// ARM-LABEL: test_dmb
-// AArch32: call void @llvm.arm.dmb(i32 1)
-// AArch64: call void @llvm.aarch64.dmb(i32 1)
+
+// AArch32-LABEL: @test_dmb(
+// AArch32-NEXT:  entry:
+// AArch32-NEXT:call void @llvm.arm.dmb(i32 1)
+// AArch32-NEXT:ret void
+//
+// AArch64-LABEL: @test_dmb(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:call void @llvm.aarch64.dmb(i32 1)
+// AArch64-NEXT:ret void
+//
 void test_dmb(void) {
   __dmb(1);
 }
 
-// ARM-LABEL: test_dsb
-// AArch32: call void @llvm.arm.dsb(i32 2)
-// AArch64: call void @llvm.aarch64.dsb(i32 2)
+// AArch32-LABEL: @test_dsb(
+// AArch32-NEXT:  entry:
+// AArch32-NEXT:call void @llvm.arm.dsb(i32 2)
+// AArch32-NEXT:ret void
+//
+// AArch64-LABEL: @test_dsb(
+// AArch64-NEXT:  entry:
+// AArch64-NEXT:call void @llvm.aarch64.dsb(i32 2)
+// AArch64-NEXT:ret

[clang] 6f1e430 - [AArch64] Alter v8.5a FRINT neon intrinsics to be target-based, not preprocessor based

2022-10-24 Thread David Green via cfe-commits

Author: David Green
Date: 2022-10-24T11:22:06+01:00
New Revision: 6f1e430360591e22fb163ec77b78efd2de4c1d95

URL: 
https://github.com/llvm/llvm-project/commit/6f1e430360591e22fb163ec77b78efd2de4c1d95
DIFF: 
https://github.com/llvm/llvm-project/commit/6f1e430360591e22fb163ec77b78efd2de4c1d95.diff

LOG: [AArch64] Alter v8.5a FRINT neon intrinsics to be target-based, not 
preprocessor based

This switches the v8.5-a FRINT intrinsics over to be target-gated,
behind preprocessor defines. This one is pretty simple, being AArch64
only.

Differential Revision: https://reviews.llvm.org/D135646

Added: 


Modified: 
clang/include/clang/Basic/arm_neon.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/Sema/aarch64-neon-target.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index a5e9dc2f16390..a7737a5f81e06 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -1225,7 +1225,7 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">;
 def FRINTI_S64 : SInst<"vrndi", "..", "dQd">;
 }
 
-let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_FRINT)" in {
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.5a" in {
 def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
 def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
 def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2a3da01febe2b..fbb6e85e37d6e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6122,14 +6122,14 @@ static const ARMVectorIntrinsicInfo 
AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | 
UnsignedAlts),
   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | 
UnsignedAlts),
-  NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType),
-  NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType),
-  NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType),
-  NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType),
-  NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType),
-  NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType),
-  NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType),
-  NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType),
+  NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+  NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+  NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+  NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+  NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+  NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+  NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+  NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
   NEONMAP0(vrndi_v),
   NEONMAP0(vrndiq_v),
   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | 
UnsignedAlts),
@@ -11313,26 +11313,26 @@ Value 
*CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   : Intrinsic::trunc;
 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
   }
-  case NEON::BI__builtin_neon_vrnd32x_v:
-  case NEON::BI__builtin_neon_vrnd32xq_v: {
+  case NEON::BI__builtin_neon_vrnd32x_f32:
+  case NEON::BI__builtin_neon_vrnd32xq_f32: {
 Ops.push_back(EmitScalarExpr(E->getArg(0)));
 Int = Intrinsic::aarch64_neon_frint32x;
 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
   }
-  case NEON::BI__builtin_neon_vrnd32z_v:
-  case NEON::BI__builtin_neon_vrnd32zq_v: {
+  case NEON::BI__builtin_neon_vrnd32z_f32:
+  case NEON::BI__builtin_neon_vrnd32zq_f32: {
 Ops.push_back(EmitScalarExpr(E->getArg(0)));
 Int = Intrinsic::aarch64_neon_frint32z;
 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
   }
-  case NEON::BI__builtin_neon_vrnd64x_v:
-  case NEON::BI__builtin_neon_vrnd64xq_v: {
+  case NEON::BI__builtin_neon_vrnd64x_f32:
+  case NEON::BI__builtin_neon_vrnd64xq_f32: {
 Ops.push_back(EmitScalarExpr(E->getArg(0)));
 Int = Intrinsic::aarch64_neon_frint64x;
 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
   }
-  case NEON::BI__builtin_neon_vrnd64z_v:
-  case NEON::BI__builtin_neon_vrnd64zq_v: {
+  case NEON::BI__builtin_neon_vrnd64z_f32:
+  case NEON::BI__builtin_neon_vrnd64zq_f32: {
 Ops.push_back(EmitScalarExpr(E->getArg(0)));
 Int = Intrinsic::aarch64_neon_frint64z;
 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");

diff  --git a/clang/test/Sema/aarch64-neon-target.c 
b/clang/test/Sema/aarch64-neon-target.c
index 5007f8f7dfae9..8b0f7f6006502 100644
--- a/clang/test/Sema/aarch64-neon-target.c
+++ b/clang/test/Sema/aarch64-neon-target.c
@@ -41,6 +41,11 @@ void bf16(uint32x2_t v2i32, uint32

[clang] 9c48b7f - [AArch64][ARM] Alter v8.1a neon intrinsics to be target-based, not preprocessor based

2022-10-25 Thread David Green via cfe-commits

Author: David Green
Date: 2022-10-25T09:02:52+01:00
New Revision: 9c48b7f0e7a96e126a369d6036eff1bb259116c8

URL: 
https://github.com/llvm/llvm-project/commit/9c48b7f0e7a96e126a369d6036eff1bb259116c8
DIFF: 
https://github.com/llvm/llvm-project/commit/9c48b7f0e7a96e126a369d6036eff1bb259116c8.diff

LOG: [AArch64][ARM] Alter v8.1a neon intrinsics to be target-based, not 
preprocessor based

As a continuation of D132034, this switches the QRDMX v8.1a neon
intrinsics over from preprocessor defines to be target-gated. As there
is no "rdma" or "qrdmx" target feature, they use the "v8.1a"
architecture feature directly.

This works well for AArch64, but something needs to be done for Arm at
the same time, as they both use the same header and tablegen emitter.
This patch opts for adding "v8.1a" and all dependant target features to
the Arm TargetParser, similar to what was recently done for AArch64 but
through initFeatureMap when the Architecture is parsed. I attempted to
make the code similar to the AArch64 backend.

Otherwise this is similar to the changes made in D132034.

Differential Revision: https://reviews.llvm.org/D135615

Added: 


Modified: 
clang/include/clang/Basic/arm_neon.td
clang/lib/Basic/Targets/ARM.cpp
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/arm-target-features.c
clang/test/Sema/aarch64-neon-target.c
clang/test/Sema/arm-neon-target.c
llvm/include/llvm/Support/ARMTargetParser.h
llvm/lib/Support/ARMTargetParser.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index a7737a5f81e06..d6b6c429a21b2 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -323,7 +323,7 @@ def VMLSL: SOpInst<"vmlsl", "(>Q)(>Q)..", "csiUcUsUi", 
OP_MLSL>;
 def VQDMULH  : SInst<"vqdmulh", "...", "siQsQi">;
 def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">;
 
-let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
+let TargetGuard = "v8.1a" in {
 def VQRDMLAH : SInst<"vqrdmlah", "", "siQsQi">;
 def VQRDMLSH : SInst<"vqrdmlsh", "", "siQsQi">;
 }
@@ -614,7 +614,7 @@ def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", 
"siQsQi">;
 def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
 }
 
-let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
+let TargetGuard = "v8.1a" in {
 def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>;
 def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>;
 }
@@ -1089,14 +1089,14 @@ let isLaneQ = 1 in {
 def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
 def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
 }
-let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in {
 def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", 
OP_QRDMLAH_LN> {
   let isLaneQ = 1;
 }
 def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", 
OP_QRDMLSH_LN> {
   let isLaneQ = 1;
 }
-}
+} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a"
 
 // Note: d type implemented by SCALAR_VMULX_LANE
 def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>;
@@ -1394,7 +1394,7 @@ def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">;
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
 def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">;
 
-let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in {
 

 // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
 def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "", "SsSi">;
@@ -1402,7 +1402,7 @@ def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "", "SsSi">;
 

 // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
 def SCALAR_SQRDMLSH : SInst<"vqrdmlsh", "", "SsSi">;
-}
+} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a"
 
 

 // Scalar Floating-point Multiply Extended
@@ -1625,7 +1625,7 @@ def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", 
"11QI", "SsSi", OP_SCALAR_
   let isLaneQ = 1;
 }
 
-let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
+let TargetGuard = "v8.1a" in {
 // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
 def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", 
OP_SCALAR_QRDMLAH_LN>;
 def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", 
OP_SCALAR_QRDMLAH_LN> {
@@ -1637,7 +1637,7 @@ def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", 
"111.I", 

[clang] af1bb28 - [AArch64][ARM] Alter v8.3a complex neon intrinsics to be target-based, not preprocessor based

2022-10-25 Thread David Green via cfe-commits

Author: David Green
Date: 2022-10-25T14:35:11+01:00
New Revision: af1bb287b4de3c5a5d82679ceb001c7d70f09c82

URL: 
https://github.com/llvm/llvm-project/commit/af1bb287b4de3c5a5d82679ceb001c7d70f09c82
DIFF: 
https://github.com/llvm/llvm-project/commit/af1bb287b4de3c5a5d82679ceb001c7d70f09c82.diff

LOG: [AArch64][ARM] Alter v8.3a complex neon intrinsics to be target-based, not 
preprocessor based

This alters the 8.3 complex intrinsics to be target-gated, as opposed to
hidden behind preprocessor macros. This is the last of arm_neon.h, and
follows the same formula as before.

Differential Revision: https://reviews.llvm.org/D135647

Added: 


Modified: 
clang/include/clang/Basic/arm_neon.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/Sema/aarch64-neon-target.c
clang/test/Sema/arm-neon-target.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index d6b6c429a21b2..4288e9eb69d07 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -1964,7 +1964,7 @@ multiclass VCMLA_ROTS {
 }
 
 // v8.3-A Vector complex addition intrinsics
-let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)", TargetGuard = "fullfp16" in {
+let TargetGuard = "v8.3a,fullfp16" in {
   def VCADD_ROT90_FP16   : SInst<"vcadd_rot90", "...", "h">;
   def VCADD_ROT270_FP16  : SInst<"vcadd_rot270", "...", "h">;
   def VCADDQ_ROT90_FP16  : SInst<"vcaddq_rot90", "QQQ", "h">;
@@ -1972,7 +1972,7 @@ let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)", 
TargetGuard = "fullfp16" in {
 
   defm VCMLA_FP16  : VCMLA_ROTS<"h", "uint32x2_t", "uint32x4_t">;
 }
-let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in {
+let TargetGuard = "v8.3a" in {
   def VCADD_ROT90   : SInst<"vcadd_rot90", "...", "f">;
   def VCADD_ROT270  : SInst<"vcadd_rot270", "...", "f">;
   def VCADDQ_ROT90  : SInst<"vcaddq_rot90", "QQQ", "f">;
@@ -1980,7 +1980,7 @@ let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in {
 
   defm VCMLA_F32: VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
 }
-let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.3a" in {
   def VCADDQ_ROT90_FP64  : SInst<"vcaddq_rot90", "QQQ", "d">;
   def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
 

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f67798000444c..0ca664bfed9e9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5667,10 +5667,16 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap 
[] = {
   NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
-  NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
-  NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
-  NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
-  NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
   NEONMAP1(vcage_v, arm_neon_vacge, 0),
   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
@@ -5985,10 +5991,16 @@ static const ARMVectorIntrinsicInfo 
AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
   NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
   NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
-  NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
-  NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
-  NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
-  NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
+  NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+  NEONMAP1(vcaddq_

  1   2   3   4   >