https://github.com/MacDue updated 
https://github.com/llvm/llvm-project/pull/174608

>From 2538bd01f629e252573e5c470eb83dfcb140570a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Tue, 13 Jan 2026 14:48:28 +0000
Subject: [PATCH 1/3] [clang][SME] Rework streaming mode always_inline
 errors/warnings

Previously, we would emit a warning when if we encountered a
`always_inline` function with incompatible streaming attributes within
a `streaming[_compatible]` function.

This has two major issues.

1. It's prone to false-positives/non-issue warnings
  * Existing library code may be marked `always_inline` without the
    appropriate `__arm_streaming_compatible` attribute, but the body of
    the function could still be safe to inline
  * In these cases, the warning can be an annoyance as the issue may not
    be in the user's code
2. It does not catch transitive errors with non-streaming safe builtins
  * If a non-streaming safe builtin is wrapped in an `always_inline`
    function, calling that function from a streaming function is simply
    a warning in the frontend, but can result in a backend crash

This patch improves this by adding a set to `ASTContext` to track if
a function contains an expression that is not safe in streaming mode.
A function is inserted into this set when a non-streaming builtin is
found, or an always_inline call to a function in the set is found.

With this, we can emit an error if a non-streaming safe builtin occurs
within a function directly or transitively via `always_inline` callees.

This allows us to downgrade the existing warning to only occur with
`-Waarch64-sme-attributes`, as it is unlikely to be an issue if the
error was not emitted.
---
 clang/include/clang/AST/ASTContext.h          |  5 ++
 .../clang/Basic/DiagnosticFrontendKinds.td    |  4 +-
 clang/include/clang/Sema/SemaARM.h            |  4 ++
 clang/lib/CodeGen/CGCall.cpp                  |  6 +-
 clang/lib/CodeGen/TargetInfo.h                | 12 ++--
 clang/lib/CodeGen/Targets/AArch64.cpp         | 66 +++++++++++++++----
 clang/lib/CodeGen/Targets/X86.cpp             |  5 +-
 clang/lib/Sema/SemaARM.cpp                    |  6 +-
 .../CodeGen/AArch64/sme-always-inline.cpp     | 56 ++++++++++++++++
 .../AArch64/sme-inline-streaming-attrs.c      |  8 +--
 10 files changed, 141 insertions(+), 31 deletions(-)
 create mode 100644 clang/test/CodeGen/AArch64/sme-always-inline.cpp

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 68205dd1c1fd9..421b3715dc9a4 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1370,6 +1370,11 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// are stored here.
   llvm::DenseMap<const CXXMethodDecl *, CXXCastPath> LambdaCastPaths;
 
+  /// Keep track of functions that contain expressions that are not valid in
+  /// streaming mode on AArch64. This is used to check inlining validity.
+  llvm::DenseSet<const FunctionDecl *>
+      AArch64ContansExprNotSafeForStreamingFunctions;
+
   ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents,
              SelectorTable &sels, Builtin::Context &builtins,
              TranslationUnitKind TUKind);
diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td 
b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index e2b257ceae80d..880a50461b8ab 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -296,7 +296,9 @@ def err_function_always_inline_attribute_mismatch : Error<
   "always_inline function %1 and its caller %0 have mismatching %2 
attributes">;
 def warn_function_always_inline_attribute_mismatch : Warning<
   "always_inline function %1 and its caller %0 have mismatching %2 attributes, 
"
-  "inlining may change runtime behaviour">, InGroup<AArch64SMEAttributes>;
+  "inlining may change runtime behaviour">, InGroup<AArch64SMEAttributes>, 
DefaultIgnore;
+def err_function_always_inline_non_streaming_builtins : Error<
+  "always_inline function %0 cannot be inlined into streaming caller as it 
contains calls to non-streaming builtins">;
 def err_function_always_inline_new_za : Error<
   "always_inline function %0 has new za state">;
 def err_function_always_inline_new_zt0
diff --git a/clang/include/clang/Sema/SemaARM.h 
b/clang/include/clang/Sema/SemaARM.h
index af8e0e9047171..a98a5036b592e 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -83,6 +83,10 @@ class SemaARM : public SemaBase {
 
   void CheckSMEFunctionDefAttributes(const FunctionDecl *FD);
 
+  void setFunctionContainsExprNotSafeForStreamingMode(const FunctionDecl *FD) {
+    getASTContext().AArch64ContansExprNotSafeForStreamingFunctions.insert(FD);
+  }
+
   /// Return true if the given types are an SVE builtin and a VectorType that
   /// is a fixed-length representation of the SVE builtin for a specific
   /// vector-length.
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index d7bdeb3981cf8..06e5f0bd8261c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5284,7 +5284,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
   // attribute-target/features. Give them a chance to diagnose.
   const FunctionDecl *CallerDecl = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
   const FunctionDecl *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl);
-  CGM.getTargetCodeGenInfo().checkFunctionCallABI(CGM, Loc, CallerDecl,
+  CGM.getTargetCodeGenInfo().checkFunctionCallABI(*this, Loc, CallerDecl,
                                                   CalleeDecl, CallArgs, RetTy);
 
   // 1. Set up the arguments.
@@ -5860,7 +5860,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
   // Note: This corresponds to the [[clang::always_inline]] statement 
attribute.
   if (InAlwaysInlineAttributedStmt &&
       !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
-          CallerDecl, CalleeDecl))
+          *this, CallerDecl, CalleeDecl))
     Attrs =
         Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
 
@@ -5878,7 +5878,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
       !InNoInlineAttributedStmt &&
       !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) &&
       !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
-          CallerDecl, CalleeDecl)) {
+          *this, CallerDecl, CalleeDecl)) {
     Attrs =
         Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
   }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index db06584d766bf..496eaeb0439d6 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -98,11 +98,10 @@ class TargetCodeGenInfo {
 
   /// Any further codegen related checks that need to be done on a function 
call
   /// in a target specific manner.
-  virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
-                                    const FunctionDecl *Caller,
-                                    const FunctionDecl *Callee,
-                                    const CallArgList &Args,
-                                    QualType ReturnType) const {}
+  virtual void
+  checkFunctionCallABI(CodeGenFunction &CGF, SourceLocation CallLoc,
+                       const FunctionDecl *Caller, const FunctionDecl *Callee,
+                       const CallArgList &Args, QualType ReturnType) const {}
 
   /// Returns true if inlining the function call would produce incorrect code
   /// for the current target and should be ignored (even with the always_inline
@@ -117,7 +116,8 @@ class TargetCodeGenInfo {
   /// See previous discussion here:
   /// 
https://discourse.llvm.org/t/rfc-avoid-inlining-alwaysinline-functions-when-they-cannot-be-inlined/79528
   virtual bool
-  wouldInliningViolateFunctionCallABI(const FunctionDecl *Caller,
+  wouldInliningViolateFunctionCallABI(CodeGenFunction &CGF,
+                                      const FunctionDecl *Caller,
                                       const FunctionDecl *Callee) const {
     return false;
   }
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp 
b/clang/lib/CodeGen/Targets/AArch64.cpp
index 963b74927036a..5522bdaa0694e 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -182,18 +182,20 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo 
{
   void checkFunctionABI(CodeGenModule &CGM,
                         const FunctionDecl *Decl) const override;
 
-  void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
+  void checkFunctionCallABI(CodeGenFunction &CGF, SourceLocation CallLoc,
                             const FunctionDecl *Caller,
                             const FunctionDecl *Callee, const CallArgList 
&Args,
                             QualType ReturnType) const override;
 
   bool wouldInliningViolateFunctionCallABI(
-      const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
+      CodeGenFunction &CGF, const FunctionDecl *Caller,
+      const FunctionDecl *Callee) const override;
 
 private:
   // Diagnose calls between functions with incompatible Streaming SVE
   // attributes.
-  void checkFunctionCallABIStreaming(CodeGenModule &CGM, SourceLocation 
CallLoc,
+  void checkFunctionCallABIStreaming(CodeGenFunction &CGF,
+                                     SourceLocation CallLoc,
                                      const FunctionDecl *Caller,
                                      const FunctionDecl *Callee) const;
   // Diagnose calls which must pass arguments in floating-point registers when
@@ -1231,16 +1233,31 @@ enum class ArmSMEInlinability : uint8_t {
   ErrorCalleeRequiresNewZT0 = 1 << 1,
   WarnIncompatibleStreamingModes = 1 << 2,
   ErrorIncompatibleStreamingModes = 1 << 3,
+  ErrorContainsNonStreamingBuiltin = 1 << 4,
 
   IncompatibleStreamingModes =
       WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
 
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes),
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorContainsNonStreamingBuiltin),
 };
 
+static bool
+functionContainsExprNotSafeForStreamingMode(CodeGenModule &CGM,
+                                            const FunctionDecl *FD) {
+  return CGM.getContext()
+      .AArch64ContansExprNotSafeForStreamingFunctions.contains(FD);
+}
+
+static void
+setFunctionContainsExprNotSafeForStreamingMode(CodeGenModule &CGM,
+                                               const FunctionDecl *FD) {
+  CGM.getContext().AArch64ContansExprNotSafeForStreamingFunctions.insert(FD);
+}
+
 /// Determines if there are any Arm SME ABI issues with inlining \p Callee into
 /// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum.
-static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
+static ArmSMEInlinability GetArmSMEInlinability(CodeGenModule &CGM,
+                                                const FunctionDecl *Caller,
                                                 const FunctionDecl *Callee) {
   bool CallerIsStreaming =
       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
@@ -1248,9 +1265,15 @@ static ArmSMEInlinability GetArmSMEInlinability(const 
FunctionDecl *Caller,
       IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true);
   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
+  bool CalleeContainsNonStreamingBuiltinCall =
+      functionContainsExprNotSafeForStreamingMode(CGM, Callee);
 
   ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok;
 
+  if ((CallerIsStreamingCompatible || CallerIsStreaming) &&
+      CalleeContainsNonStreamingBuiltinCall)
+    Inlinability |= ArmSMEInlinability::ErrorContainsNonStreamingBuiltin;
+
   if (!CalleeIsStreamingCompatible &&
       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) 
{
     if (CalleeIsStreaming)
@@ -1258,6 +1281,7 @@ static ArmSMEInlinability GetArmSMEInlinability(const 
FunctionDecl *Caller,
     else
       Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
   }
+
   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
     if (NewAttr->isNewZA())
       Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
@@ -1269,12 +1293,20 @@ static ArmSMEInlinability GetArmSMEInlinability(const 
FunctionDecl *Caller,
 }
 
 void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
-    CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+    CodeGenFunction &CGF, SourceLocation CallLoc, const FunctionDecl *Caller,
     const FunctionDecl *Callee) const {
-  if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
+  if (!Caller || !Callee || CGF.InNoInlineAttributedStmt ||
+      !Callee->hasAttr<AlwaysInlineAttr>())
     return;
 
-  ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
+  CodeGenModule &CGM = CGF.CGM;
+  ArmSMEInlinability Inlinability = GetArmSMEInlinability(CGM, Caller, Callee);
+
+  if ((Inlinability & ArmSMEInlinability::ErrorContainsNonStreamingBuiltin) !=
+      ArmSMEInlinability::Ok)
+    CGM.getDiags().Report(
+        CallLoc, diag::err_function_always_inline_non_streaming_builtins)
+        << Callee->getDeclName();
 
   if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
       ArmSMEInlinability::Ok)
@@ -1318,20 +1350,28 @@ void 
AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat(
                          Callee ? Callee : Caller, CallLoc);
 }
 
-void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
+void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenFunction &CGF,
                                                     SourceLocation CallLoc,
                                                     const FunctionDecl *Caller,
                                                     const FunctionDecl *Callee,
                                                     const CallArgList &Args,
                                                     QualType ReturnType) const 
{
-  checkFunctionCallABIStreaming(CGM, CallLoc, Caller, Callee);
-  checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, 
ReturnType);
+  if (!CGF.InNoInlineAttributedStmt && Caller && Callee &&
+      Callee->hasAttr<AlwaysInlineAttr>() &&
+      functionContainsExprNotSafeForStreamingMode(CGF.CGM, Callee))
+    setFunctionContainsExprNotSafeForStreamingMode(CGF.CGM, Caller);
+
+  checkFunctionCallABIStreaming(CGF, CallLoc, Caller, Callee);
+  checkFunctionCallABISoftFloat(CGF.CGM, CallLoc, Caller, Callee, Args,
+                                ReturnType);
 }
 
 bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
-    const FunctionDecl *Caller, const FunctionDecl *Callee) const {
+    CodeGenFunction &CGF, const FunctionDecl *Caller,
+    const FunctionDecl *Callee) const {
   return Caller && Callee &&
-         GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok;
+         GetArmSMEInlinability(CGF.CGM, Caller, Callee) !=
+             ArmSMEInlinability::Ok;
 }
 
 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
diff --git a/clang/lib/CodeGen/Targets/X86.cpp 
b/clang/lib/CodeGen/Targets/X86.cpp
index e6203db8bc245..a36523a7c4a1a 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -1504,7 +1504,7 @@ class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
     }
   }
 
-  void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
+  void checkFunctionCallABI(CodeGenFunction &CGF, SourceLocation CallLoc,
                             const FunctionDecl *Caller,
                             const FunctionDecl *Callee, const CallArgList 
&Args,
                             QualType ReturnType) const override;
@@ -1570,7 +1570,7 @@ static bool checkAVXParam(DiagnosticsEngine &Diag, 
ASTContext &Ctx,
   return false;
 }
 
-void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
+void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenFunction &CGF,
                                                    SourceLocation CallLoc,
                                                    const FunctionDecl *Caller,
                                                    const FunctionDecl *Callee,
@@ -1579,6 +1579,7 @@ void 
X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
   if (!Callee)
     return;
 
+  CodeGenModule &CGM = CGF.CGM;
   llvm::StringMap<bool> CallerMap;
   llvm::StringMap<bool> CalleeMap;
   unsigned ArgIndex = 0;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 53e8c002a1962..330e78593e3a9 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -623,9 +623,11 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr 
*TheCall,
            BuiltinType == SemaARM::ArmStreaming)
     S.Diag(TheCall->getBeginLoc(), diag::err_attribute_arm_sm_incompat_builtin)
         << TheCall->getSourceRange() << "streaming";
-  else
+  else {
+    if (BuiltinType == SemaARM::ArmNonStreaming)
+      S.ARM().setFunctionContainsExprNotSafeForStreamingMode(FD);
     return false;
-
+  }
   return true;
 }
 
diff --git a/clang/test/CodeGen/AArch64/sme-always-inline.cpp 
b/clang/test/CodeGen/AArch64/sme-always-inline.cpp
new file mode 100644
index 0000000000000..f678d9d77edfe
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme-always-inline.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -Waarch64-sme-attributes -triple aarch64-none-linux-gnu -S 
-o /dev/null -target-feature +sme -target-feature +neon -verify=expected-attr 
%s -DTEST_STREAMING
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null 
-target-feature +sme -target-feature +neon -verify %s -DTEST_STREAMING
+// RUN: %clang_cc1 -Waarch64-sme-attributes -triple aarch64-none-linux-gnu -S 
-o /dev/null -target-feature +sme -target-feature +neon -verify=expected-attr 
%s -DTEST_COMPATIBLE
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null 
-target-feature +sme -target-feature +neon -verify %s -DTEST_COMPATIBLE
+
+#if defined(TEST_STREAMING)
+#define SM_ATTR __arm_streaming
+#elif defined(TEST_COMPATIBLE)
+#define SM_ATTR __arm_streaming_compatible
+#else
+#error "Expected TEST_STREAMING or TEST_COMPATIBLE"
+#endif
+
+__attribute__((always_inline)) void incompatible_neon() {
+  __attribute((vector_size(16))) char vec = { 0 };
+  vec = __builtin_neon_vqaddq_v(vec, vec, 33);
+}
+
+__attribute__((always_inline)) void compatible_missing_attrs() {
+    // <Empty>
+}
+
+void foo() {
+    incompatible_neon();
+}
+
+// expected-note@+2 {{conflicting attribute is here}}
+// expected-attr-note@+1 {{conflicting attribute is here}}
+__attribute__((always_inline)) void bar() {
+    incompatible_neon();
+}
+
+__attribute__((always_inline)) void baz() {
+    compatible_missing_attrs();
+}
+
+void streaming_error() SM_ATTR {
+    // expected-error@+3 {{always_inline function 'bar' cannot be inlined into 
streaming caller as it contains calls to non-streaming builtins}}
+    // expected-attr-warning@+2 {{always_inline function 'bar' and its caller 
'streaming_error' have mismatching streaming attributes, inlining may change 
runtime behaviour}}
+    // expected-attr-error@+1 {{always_inline function 'bar' cannot be inlined 
into streaming caller as it contains calls to non-streaming builtins}}
+    bar(); // -> incompatible_neon -> __builtin_neon_vqaddq_v (error)
+}
+
+void streaming_warning() SM_ATTR {
+    // expected-attr-warning@+1 {{always_inline function 'baz' and its caller 
'streaming_warning' have mismatching streaming attributes, inlining may change 
runtime behaviour}}
+    baz(); // -> compatible_missing_attrs (no error)
+
+    /// `noinline` has higher precedence than always_inline (so this is not an 
error)
+    // expected-warning@+2 {{statement attribute 'clang::noinline' has higher 
precedence than function attribute 'always_inline'}}
+    // expected-attr-warning@+1 {{statement attribute 'clang::noinline' has 
higher precedence than function attribute 'always_inline'}}
+    [[clang::noinline]] bar();
+}
+
+void streaming_no_warning() SM_ATTR {
+    foo(); // `foo` is not always_inline (no error/warning)
+}
diff --git a/clang/test/CodeGen/AArch64/sme-inline-streaming-attrs.c 
b/clang/test/CodeGen/AArch64/sme-inline-streaming-attrs.c
index 68102c9ded40c..374e29ab13036 100644
--- a/clang/test/CodeGen/AArch64/sme-inline-streaming-attrs.c
+++ b/clang/test/CodeGen/AArch64/sme-inline-streaming-attrs.c
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null 
-target-feature +sme -target-feature +sme2 -verify -DTEST_NONE %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null 
-target-feature +sme -target-feature +sme2 -verify -DTEST_COMPATIBLE %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null 
-target-feature +sme -target-feature +sme2 -verify -DTEST_STREAMING %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null 
-target-feature +sme -target-feature +sme2 -verify -DTEST_LOCALLY %s
+// RUN: %clang_cc1 -Waarch64-sme-attributes -triple aarch64-none-linux-gnu -S 
-o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_NONE %s
+// RUN: %clang_cc1 -Waarch64-sme-attributes -triple aarch64-none-linux-gnu -S 
-o /dev/null -target-feature +sme -target-feature +sme2 -verify 
-DTEST_COMPATIBLE %s
+// RUN: %clang_cc1 -Waarch64-sme-attributes -triple aarch64-none-linux-gnu -S 
-o /dev/null -target-feature +sme -target-feature +sme2 -verify 
-DTEST_STREAMING %s
+// RUN: %clang_cc1 -Waarch64-sme-attributes -triple aarch64-none-linux-gnu -S 
-o /dev/null -target-feature +sme -target-feature +sme2 -verify -DTEST_LOCALLY 
%s
 
 // REQUIRES: aarch64-registered-target
 

>From afaecf34b38f3d39ccb2e0416b8539378b8c63e5 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Wed, 14 Jan 2026 14:19:22 +0000
Subject: [PATCH 2/3] Add working intrinsics

---
 llvm/include/llvm/IR/IntrinsicsAArch64.td     | 27 ++++++++++++++++---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 12 +++++++++
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index fd56e0e3f9e7b..2c27c377f90a9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1049,6 +1049,22 @@ def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
 
 let TargetPrefix = "aarch64" in {  // All intrinsics start with 
"llvm.aarch64.".
 
+  class AdvSIMD_2Vec_PredLoadQ_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                [llvm_nxv1i1_ty, llvm_ptr_ty],
+                [IntrReadMem, IntrArgMemOnly]>;
+
+  class AdvSIMD_3Vec_PredLoadQ_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>],
+                [llvm_nxv1i1_ty, llvm_ptr_ty],
+                [IntrReadMem, IntrArgMemOnly]>;
+
+  class AdvSIMD_4Vec_PredLoadQ_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>,
+                 LLVMMatchType<0>],
+                [llvm_nxv1i1_ty, llvm_ptr_ty],
+                [IntrReadMem, IntrArgMemOnly]>;
+
   class AdvSIMD_1Vec_PredLoad_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
@@ -2888,6 +2904,10 @@ def int_aarch64_sve_ld2q_sret : 
AdvSIMD_2Vec_PredLoad_Intrinsic;
 def int_aarch64_sve_ld3q_sret : AdvSIMD_3Vec_PredLoad_Intrinsic;
 def int_aarch64_sve_ld4q_sret : AdvSIMD_4Vec_PredLoad_Intrinsic;
 
+def int_aarch64_sve_ld2q : AdvSIMD_2Vec_PredLoadQ_Intrinsic;
+def int_aarch64_sve_ld3q : AdvSIMD_3Vec_PredLoadQ_Intrinsic;
+def int_aarch64_sve_ld4q : AdvSIMD_4Vec_PredLoadQ_Intrinsic;
+
 def int_aarch64_sve_st2q : AdvSIMD_2Vec_PredStore_Intrinsic;
 def int_aarch64_sve_st3q : AdvSIMD_3Vec_PredStore_Intrinsic;
 def int_aarch64_sve_st4q : AdvSIMD_4Vec_PredStore_Intrinsic;
@@ -3696,7 +3716,7 @@ let TargetPrefix = "aarch64" in {
 
   def int_aarch64_sve_fscale_x2 : SVE2_VG2_Multi_Multi_Scale_Intrinsic;
   def int_aarch64_sve_fscale_x4 : SVE2_VG4_Multi_Multi_Scale_Intrinsic;
-  
+
   //
   // Multi-vector floating point absolute min/max number
   //
@@ -4255,7 +4275,7 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2  : 
SME_FP8_ZA_LANE_VGx2_Intrinsic;
   def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : 
SME_FP8_ZA_LANE_VGx2_Intrinsic;
   def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : 
SME_FP8_ZA_LANE_VGx2_Intrinsic;
-  
+
   // AES2
   class SVE2_Crypto_LANE_X2_Intrinsic
   : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
@@ -4263,7 +4283,7 @@ let TargetPrefix = "aarch64" in {
       [ImmArg<ArgIndex<3>>, IntrNoMem]>;
   class SVE2_Crypto_LANE_X4_Intrinsic
   : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty],
-      [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
+      [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
        llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
       [ImmArg<ArgIndex<5>>, IntrNoMem]>;
 
@@ -4282,4 +4302,3 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_pmlal_pair_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, 
llvm_nxv2i64_ty],
       [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], 
[IntrNoMem]>;
 }
-
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7b566e432b6bc..08bb8154a5cb6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5250,14 +5250,17 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     case Intrinsic::aarch64_ld64b:
       SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
       return;
+    case Intrinsic::aarch64_sve_ld2q:
     case Intrinsic::aarch64_sve_ld2q_sret: {
       SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
       return;
     }
+    case Intrinsic::aarch64_sve_ld3q:
     case Intrinsic::aarch64_sve_ld3q_sret: {
       SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
       return;
     }
+    case Intrinsic::aarch64_sve_ld4q:
     case Intrinsic::aarch64_sve_ld4q_sret: {
       SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
       return;
@@ -7546,6 +7549,15 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode 
*Root) {
   case Intrinsic::aarch64_sve_ld1uwq:
   case Intrinsic::aarch64_sve_st1wq:
     return EVT(MVT::nxv1i32);
+  case Intrinsic::aarch64_sve_ld2q:
+    return EVT::getVectorVT(Ctx, Root->getValueType(0).getVectorElementType(),
+                            Root->getValueType(0).getVectorElementCount() * 2);
+  case Intrinsic::aarch64_sve_ld3q:
+    return EVT::getVectorVT(Ctx, Root->getValueType(0).getVectorElementType(),
+                            Root->getValueType(0).getVectorElementCount() * 3);
+  case Intrinsic::aarch64_sve_ld4q:
+    return EVT::getVectorVT(Ctx, Root->getValueType(0).getVectorElementType(),
+                            Root->getValueType(0).getVectorElementCount() * 4);
   }
 }
 

>From 9cef614dabb02a52c9d196415a376a50b09e55ed Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Wed, 14 Jan 2026 17:57:22 +0000
Subject: [PATCH 3/3] Fix typo

---
 clang/include/clang/AST/ASTContext.h  | 2 +-
 clang/include/clang/Sema/SemaARM.h    | 2 +-
 clang/lib/CodeGen/Targets/AArch64.cpp | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 421b3715dc9a4..c7da72f776212 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1373,7 +1373,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// Keep track of functions that contain expressions that are not valid in
   /// streaming mode on AArch64. This is used to check inlining validity.
   llvm::DenseSet<const FunctionDecl *>
-      AArch64ContansExprNotSafeForStreamingFunctions;
+      AArch64ContainsExprNotSafeForStreamingFunctions;
 
   ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents,
              SelectorTable &sels, Builtin::Context &builtins,
diff --git a/clang/include/clang/Sema/SemaARM.h 
b/clang/include/clang/Sema/SemaARM.h
index a98a5036b592e..b6a9f1c032bd5 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -84,7 +84,7 @@ class SemaARM : public SemaBase {
   void CheckSMEFunctionDefAttributes(const FunctionDecl *FD);
 
   void setFunctionContainsExprNotSafeForStreamingMode(const FunctionDecl *FD) {
-    getASTContext().AArch64ContansExprNotSafeForStreamingFunctions.insert(FD);
+    getASTContext().AArch64ContainsExprNotSafeForStreamingFunctions.insert(FD);
   }
 
   /// Return true if the given types are an SVE builtin and a VectorType that
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp 
b/clang/lib/CodeGen/Targets/AArch64.cpp
index 5522bdaa0694e..20275f36fe35c 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -1245,13 +1245,13 @@ static bool
 functionContainsExprNotSafeForStreamingMode(CodeGenModule &CGM,
                                             const FunctionDecl *FD) {
   return CGM.getContext()
-      .AArch64ContansExprNotSafeForStreamingFunctions.contains(FD);
+      .AArch64ContainsExprNotSafeForStreamingFunctions.contains(FD);
 }
 
 static void
 setFunctionContainsExprNotSafeForStreamingMode(CodeGenModule &CGM,
                                                const FunctionDecl *FD) {
-  CGM.getContext().AArch64ContansExprNotSafeForStreamingFunctions.insert(FD);
+  CGM.getContext().AArch64ContainsExprNotSafeForStreamingFunctions.insert(FD);
 }
 
 /// Determines if there are any Arm SME ABI issues with inlining \p Callee into

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to