llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-coroutines

Author: None (snarkmaster)

<details>
<summary>Changes</summary>

See `AttrDocs.td` for the user-facing docs.  The immediate motivation was that 
I noticed that short-circuiting coroutines failed to optimize well. My demo 
program for lack-of-optimization is here: https://godbolt.org/z/oMdq8zfov

With this patch, `simple_coro()` in the demo does avoid the allocation, and 
compiles to a function-like form. 

```cpp
#include &lt;coroutine&gt;
#include &lt;optional&gt;

// `optional_wrapper` exists since `get_return_object()` can't return
// `std::optional` directly. C++ coroutines have a fundamental timing mismatch
// between when the return object is created and when the value is available:
//
// 1) Early (coroutine startup): `get_return_object()` is called and must return
//    something immediately.
// 2) Later (when `co_return` executes): `return_value(T)` is called with the
//    actual value.
// 3) Issue: If `get_return_object()` returns the storage, it's empty when
//    returned, and writing to it later cannot affect the already-returned copy.
template &lt;typename T&gt;
struct optional_wrapper {
  std::optional&lt;T&gt; storage_;
  std::optional&lt;T&gt;*&amp; pointer_;
  optional_wrapper(std::optional&lt;T&gt;*&amp; p) : pointer_(p) {
    pointer_ = &amp;storage_;
  }
  operator std::optional&lt;T&gt;() {
    return std::move(*storage_);
  }
  ~optional_wrapper() {}
};

// Make `std::optional` a coroutine
namespace std {
template &lt;typename T, typename... Args&gt;
struct coroutine_traits&lt;optional&lt;T&gt;, Args...&gt; {
  struct promise_type {
    optional&lt;T&gt;* storagePtr_ = nullptr;

    promise_type() = default;

    ::optional_wrapper&lt;T&gt; get_return_object() {
      return ::optional_wrapper&lt;T&gt;(storagePtr_);
    }

    suspend_never initial_suspend() const noexcept {
      return {};
    }
    suspend_never final_suspend() const noexcept {
      return {};
    }

    void return_value(T value) {
      *storagePtr_ = std::move(value);
    }

    void unhandled_exception() {
      // Leave storage_ empty to represent error
    }
  };
};
} // namespace std

// Non-coroutine baseline -- compare to the `simple_coro` variants below
std::optional&lt;int&gt; simple_func(const std::optional&lt;int&gt;&amp; r) {
  if (r.has_value()) {
    return r.value() + 1;
  }
  return std::nullopt; // empty optional (error)
}

// Without `co_await`, this optimizes just like `simple_func`.
// Unfortunately, this doesn't short-circuit when `r` is empty.
std::optional&lt;int&gt; wrong_simple_coro(const std::optional&lt;int&gt;&amp; 
r) {
  co_return r.value() + 2;
}

// `#if 0` adds a short-circuiting `co_await` that may suspend to
// immediately exit the parent coroutine. Unfortunately, this
// triggers a coro frame allocation, and pessimizes `simple_coro`.

template &lt;typename T&gt;
struct optional_awaitable {
  std::optional&lt;T&gt; opt_;

  bool await_ready() const noexcept {
    return opt_.has_value();
  }

  T await_resume() {
    return opt_.value();
  }

  template &lt;typename Promise&gt;
  [[clang::coro_destroyed_on_suspend]] void await_suspend(
      std::coroutine_handle&lt;Promise&gt; handle) {
    handle.destroy();
  }
};

template &lt;typename T&gt;
optional_awaitable&lt;T&gt; operator co_await(std::optional&lt;T&gt; opt) {
  return {std::move(opt)};
}

// Matches the logic of `simple_func`.  Before this patch, adding a
// "suspend-to-exit" await point forces an allocation.
std::optional&lt;int&gt; simple_coro(const std::optional&lt;int&gt;&amp; r) {
  co_return co_await std::move(r) + 4;
}

int main() {
  return simple_coro(std::optional&lt;int&gt;{8}).value() +
      wrong_simple_coro(std::optional&lt;int&gt;{16}).value() +
      simple_func(std::optional&lt;int&gt;{32}).value();
}
```

---
Full diff: https://github.com/llvm/llvm-project/pull/152029.diff


7 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+4) 
- (modified) clang/include/clang/Basic/Attr.td (+8) 
- (modified) clang/include/clang/Basic/AttrDocs.td (+60) 
- (modified) clang/lib/CodeGen/CGCoroutine.cpp (+47-16) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+2-1) 
- (added) clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp (+152) 
- (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test 
(+1) 


``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9231f2cae9bfa..3ebf64bfbbe11 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -116,6 +116,10 @@ Removed Compiler Flags
 Attribute Changes in Clang
 --------------------------
 
+- Introduced a new attribute ``[[clang::coro_destroyed_on_suspend]]`` on 
coroutine ``await_suspend``
+  methods to indicate that the coroutine will be destroyed during suspension, 
enabling optimizations
+  that skip the suspend point.
+
 Improvements to Clang's diagnostics
 -----------------------------------
 - Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for 
the more pedantic
diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 63fa6aa508827..f0e12ed3b9be3 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr {
   let SimpleHandler = 1;
 }
 
+def CoroDestroyedOnSuspend: InheritableAttr {
+  let Spellings = [Clang<"coro_destroyed_on_suspend">];
+  let Subjects = SubjectList<[CXXMethod]>;
+  let LangOpts = [CPlusPlus];
+  let Documentation = [CoroDestroyedOnSuspendDoc];
+  let SimpleHandler = 1;
+}
+
 // OSObject-based attributes.
 def OSConsumed : InheritableParamAttr {
   let Spellings = [Clang<"os_consumed">];
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 326cf0bc842db..7745e08de44cc 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9244,6 +9244,66 @@ Example:
 }];
 }
 
+def CoroDestroyedOnSuspendDoc : Documentation {
+  let Category = DocCatDecl;
+  let Content = [{
+The ``[[clang::coro_destroyed_on_suspend]]`` attribute may be applied to a C++
+``void await_suspend(handle)`` method belonging to a coroutine awaiter object.
+
+**Warning:** The attribute must only be used when ``await_suspend`` is
+guaranteed to call ``handle.destroy()`` before completing.  Violating this
+invariant **will** cause undefined behavior.  Be sure to consider exceptions.
+
+This attribute improves the optimization of "short-circuiting" coroutines.  It
+is useful when every suspend point in the coroutine is either (i) trivial
+(`std::suspend_never`), or (ii) a `co_await` that can be translated into simple
+control flow as:
+
+.. code-block:: c++
+
+  if (awaiter.await_ready()) {
+    val = awaiter.await_resume();
+  } else {
+    awaiter.await_suspend();
+    return /* value representing the "execution short-circuited" outcome */
+  }
+
+Adding the attribute permits the compiler to elide the `@llvm.coro.suspend`
+intrinsic and go directly to the cleanup/destruction path, avoiding unnecessary
+suspend/resume overhead.  If all suspension points can be simplified via (i) or
+(ii), then the coroutine should be able to be optimized nearly like a plain
+function via the `isNoSuspendCoroutine` optimization. The typical wins are:
+  - no heap allocation
+  - smaller code size & better optimization
+
+For example,
+
+.. code-block:: c++
+
+  template <typename T>
+  struct optional_awaitable {
+    std::optional<T> opt_;
+
+    bool await_ready() const noexcept {
+      return opt_.has_value();
+    }
+
+    T await_resume() {
+      return std::move(opt_).value();
+    }
+
+    [[clang::coro_destroyed_on_suspend]]
+    template <typename Promise>
+    void await_suspend(std::coroutine_handle<Promise> handle) {
+      // DANGER: If any exit path from a `coro_destroyed_on_suspend`-marked
+      // `await_suspend` neglects to call `destroy()`, that will cause UB.
+      handle.destroy();
+    }
+  };
+
+}];
+}
+
 def CountedByDocs : Documentation {
   let Category = DocCatField;
   let Content = [{
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp 
b/clang/lib/CodeGen/CGCoroutine.cpp
index 827385f9c1a1f..e72c0e28a442e 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -247,8 +247,9 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction 
&CGF, CGCoroData &Co
   auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
   auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
 
+  bool SuspendWillDestroyCoro = false;
   auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
-      CGF.CurFn->getName(), Prefix, S);
+      CGF.CurFn->getName(), Prefix, S, SuspendWillDestroyCoro);
 
   CGF.CurCoro.InSuspendBlock = true;
 
@@ -317,17 +318,24 @@ static LValueOrRValue 
emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
   }
   }
 
-  // Emit the suspend point.
-  const bool IsFinalSuspend = (Kind == AwaitKind::Final);
-  llvm::Function *CoroSuspend =
-      CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
-  auto *SuspendResult = Builder.CreateCall(
-      CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
-
-  // Create a switch capturing three possible continuations.
-  auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
-  Switch->addCase(Builder.getInt8(0), ReadyBlock);
-  Switch->addCase(Builder.getInt8(1), CleanupBlock);
+  // If suspend will destroy coroutine, skip the suspend intrinsic and go
+  // straight to cleanup
+  if (SuspendWillDestroyCoro) {
+    // Skip the suspend point and go directly to cleanup
+    CGF.EmitBranch(CleanupBlock);
+  } else {
+    // Emit the suspend point.
+    const bool IsFinalSuspend = (Kind == AwaitKind::Final);
+    llvm::Function *CoroSuspend =
+        CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
+    auto *SuspendResult = Builder.CreateCall(
+        CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
+
+    // Create a switch capturing three possible continuations.
+    auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
+    Switch->addCase(Builder.getInt8(0), ReadyBlock);
+    Switch->addCase(Builder.getInt8(1), CleanupBlock);
+  }
 
   // Emit cleanup for this suspend point.
   CGF.EmitBlock(CleanupBlock);
@@ -411,10 +419,9 @@ static QualType getCoroutineSuspendExprReturnType(const 
ASTContext &Ctx,
 }
 #endif
 
-llvm::Function *
-CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
-                                             Twine const &SuspendPointName,
-                                             CoroutineSuspendExpr const &S) {
+llvm::Function *CodeGenFunction::generateAwaitSuspendWrapper(
+    Twine const &CoroName, Twine const &SuspendPointName,
+    CoroutineSuspendExpr const &S, bool &SuspendWillDestroyCoro) {
   std::string FuncName =
       (CoroName + ".__await_suspend_wrapper__" + SuspendPointName).str();
 
@@ -445,6 +452,30 @@ CodeGenFunction::generateAwaitSuspendWrapper(Twine const 
&CoroName,
   Fn->setMustProgress();
   Fn->addFnAttr(llvm::Attribute::AttrKind::AlwaysInline);
 
+  // Is the underlying `await_suspend()` marked `coro_destroyed_on_suspend`?
+  SuspendWillDestroyCoro = false;
+  // Only check for the attribute if the suspend expression returns void.  When
+  // a handle is returned, the outermost method call is `.address()`, and
+  // there's no point in doing more work to obtain the inner `await_suspend`.
+  if (S.getSuspendReturnType() == 
CoroutineSuspendExpr::SuspendReturnType::SuspendVoid) {
+    if (auto *CE = dyn_cast<CallExpr>(S.getSuspendExpr()->IgnoreImplicit())) {
+      if (auto *ME = dyn_cast<MemberExpr>(CE->getCallee())) {
+        if (auto *MethodDecl = dyn_cast<CXXMethodDecl>(ME->getMemberDecl())) {
+          if (MethodDecl->getNameAsString() == "await_suspend") {
+            for (const auto *Attr : MethodDecl->attrs()) {
+              if (Attr->getKind() == attr::Kind::CoroDestroyedOnSuspend) {
+                SuspendWillDestroyCoro = true;
+                break;
+              }
+            }
+          } else {
+            assert(false && "Unexpected method for 
[[clang::coro_destroyed_on_suspend]]");
+          }
+        }
+      }
+    }
+  }
+
   StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args);
 
   // FIXME: add TBAA metadata to the loads
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 6c32c98cec011..ecb5d8e2fcf8c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -383,7 +383,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   // where type is one of (void, i1, ptr)
   llvm::Function *generateAwaitSuspendWrapper(Twine const &CoroName,
                                               Twine const &SuspendPointName,
-                                              CoroutineSuspendExpr const &S);
+                                              CoroutineSuspendExpr const &S,
+                                              bool &SuspendWillDestroyCoro);
 
   /// CurGD - The GlobalDecl for the current function being compiled.
   GlobalDecl CurGD;
diff --git a/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp 
b/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp
new file mode 100644
index 0000000000000..99505c317e24e
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp
@@ -0,0 +1,152 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - 
%s \
+// RUN:   -disable-llvm-passes | FileCheck %s --check-prefix=CHECK-INITIAL
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - 
%s \
+// RUN:   -O2 | FileCheck %s --check-prefix=CHECK-OPTIMIZED
+
+#include "Inputs/coroutine.h"
+
+// Awaitable with `coro_destroyed_on_suspend` attribute
+struct DestroyingAwaitable {
+  bool await_ready() { return false; }
+  [[clang::coro_destroyed_on_suspend]]
+  void await_suspend(std::coroutine_handle<> h) {
+    h.destroy();
+  }
+  void await_resume() {}
+};
+
+// Awaitable without `coro_destroyed_on_suspend` (normal behavior)
+struct NormalAwaitable {
+  bool await_ready() { return false; }
+  void await_suspend(std::coroutine_handle<> h) {}
+  void await_resume() {}
+};
+
+// Coroutine type with `std::suspend_never` for initial/final suspend
+struct Task {
+  struct promise_type {
+    Task get_return_object() { return {}; }
+    std::suspend_never initial_suspend() { return {}; }
+    std::suspend_never final_suspend() noexcept { return {}; }
+    void return_void() {}
+    void unhandled_exception() {}
+  };
+};
+
+// Single co_await with coro_destroyed_on_suspend.
+// Should result in no allocation after optimization.
+Task test_single_destroying_await() {
+  co_await DestroyingAwaitable{};
+}
+
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv
+// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
+// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+// Test multiple `co_await`s, all with `coro_destroyed_on_suspend`.
+// This should also result in no allocation after optimization.
+Task test_multiple_destroying_awaits(bool condition) {
+  co_await DestroyingAwaitable{};
+  co_await DestroyingAwaitable{};
+  if (condition) {
+    co_await DestroyingAwaitable{};
+  }
+}
+
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb
+// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
+// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void 
@_Z31test_multiple_destroying_awaitsb
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+// Mixed awaits - some with `coro_destroyed_on_suspend`, some without.
+// We should still see allocation because not all awaits destroy the coroutine.
+Task test_mixed_awaits() {
+  co_await NormalAwaitable{}; // Must precede "destroy" to be reachable
+  co_await DestroyingAwaitable{};
+}
+
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv
+// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
+// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv
+// CHECK-OPTIMIZED: call{{.*}} @_Znwm
+
+
+// Check the attribute detection affects control flow.  With
+// `coro_destroyed_on_suspend`, the suspend point should be skipped and control
+// should go directly to cleanup instead of the normal suspend logic
+Task test_attribute_detection() {
+  co_await DestroyingAwaitable{};
+  // Unreachable in OPTIMIZED, so those builds don't see an allocation.
+  co_await NormalAwaitable{};
+}
+
+// Check that we skip the normal suspend intrinsic and go directly to cleanup.
+// This is the key behavioral difference the attribute enables.
+//
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z24test_attribute_detectionv
+// CHECK-INITIAL: call{{.*}} 
@_Z24test_attribute_detectionv.__await_suspend_wrapper__await
+// CHECK-NEXT: br label %await.cleanup
+// CHECK-INITIAL-NOT: call{{.*}} @llvm.coro.suspend
+// CHECK-INITIAL: call{{.*}} 
@_Z24test_attribute_detectionv.__await_suspend_wrapper__await
+// CHECK-INITIAL: call{{.*}} @llvm.coro.suspend
+// CHECK-INITIAL: call{{.*}} 
@_Z24test_attribute_detectionv.__await_suspend_wrapper__final
+
+// Since `co_await DestroyingAwaitable{}` gets converted into an unconditional
+// branch, the `co_await NormalAwaitable{}` is unreachable in optimized builds.
+//
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+// Template awaitable with `coro_destroyed_on_suspend` attribute
+template<typename T>
+struct TemplateDestroyingAwaitable {
+  bool await_ready() { return false; }
+
+  [[clang::coro_destroyed_on_suspend]]
+  void await_suspend(std::coroutine_handle<> h) {
+    h.destroy();
+  }
+
+  void await_resume() {}
+};
+
+Task test_template_destroying_await() {
+  co_await TemplateDestroyingAwaitable<int>{};
+}
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void 
@_Z30test_template_destroying_awaitv
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+struct InvalidDestroyingAwaitable {
+  bool await_ready() { return false; }
+
+  // The attribute is ignored, since the function doesn't return `void`!
+  [[clang::coro_destroyed_on_suspend]]
+  bool await_suspend(std::coroutine_handle<> h) {
+    h.destroy();
+    return true;
+  }
+
+  void await_resume() {}
+};
+
+Task test_invalid_destroying_await() {
+  co_await InvalidDestroyingAwaitable{};
+}
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z29test_invalid_destroying_awaitv
+// CHECK-OPTIMIZED: call{{.*}} @_Znwm
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test 
b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 05693538252aa..a02b97869adc9 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -62,6 +62,7 @@
 // CHECK-NEXT: Convergent (SubjectMatchRule_function)
 // CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record)
 // CHECK-NEXT: CoroAwaitElidableArgument 
(SubjectMatchRule_variable_is_parameter)
+// CHECK-NEXT: CoroDestroyedOnSuspend (SubjectMatchRule_function_is_member)
 // CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function)
 // CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record)
 // CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record)

``````````

</details>


https://github.com/llvm/llvm-project/pull/152029
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to