lxfind updated this revision to Diff 355431.
lxfind added a comment.

Put the post-split ramp function back to the CGSCC worklist


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95807/new/

https://reviews.llvm.org/D95807

Files:
  clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Transforms/Coroutines/CoroSplit.cpp
  llvm/test/Transforms/Coroutines/ArgAddr.ll
  llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
  llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll
  llvm/test/Transforms/Coroutines/coro-alloca-01.ll
  llvm/test/Transforms/Coroutines/coro-alloca-02.ll
  llvm/test/Transforms/Coroutines/coro-alloca-03.ll
  llvm/test/Transforms/Coroutines/coro-alloca-04.ll
  llvm/test/Transforms/Coroutines/coro-alloca-05.ll
  llvm/test/Transforms/Coroutines/coro-alloca-06.ll
  llvm/test/Transforms/Coroutines/coro-alloca-07.ll
  llvm/test/Transforms/Coroutines/coro-alloca-08.ll
  llvm/test/Transforms/Coroutines/coro-async.ll
  llvm/test/Transforms/Coroutines/coro-byval-param.ll
  llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll
  llvm/test/Transforms/Coroutines/coro-catchswitch.ll
  llvm/test/Transforms/Coroutines/coro-debug.ll
  llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll
  llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll
  llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll
  llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll
  llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll
  llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll
  llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll
  llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll
  llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
  llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
  llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll
  llvm/test/Transforms/Coroutines/coro-frame.ll
  llvm/test/Transforms/Coroutines/coro-materialize.ll
  llvm/test/Transforms/Coroutines/coro-padding.ll
  llvm/test/Transforms/Coroutines/coro-param-copy.ll
  llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
  llvm/test/Transforms/Coroutines/coro-retcon-frame.ll
  llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
  llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll
  llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
  llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll
  llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll
  llvm/test/Transforms/Coroutines/coro-retcon-value.ll
  llvm/test/Transforms/Coroutines/coro-retcon.ll
  llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll
  llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll
  llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll
  llvm/test/Transforms/Coroutines/coro-spill-promise.ll
  llvm/test/Transforms/Coroutines/coro-split-00.ll
  llvm/test/Transforms/Coroutines/coro-split-02.ll
  llvm/test/Transforms/Coroutines/coro-split-alloc.ll
  llvm/test/Transforms/Coroutines/coro-split-dbg.ll
  llvm/test/Transforms/Coroutines/coro-split-eh-00.ll
  llvm/test/Transforms/Coroutines/coro-split-eh-01.ll
  llvm/test/Transforms/Coroutines/coro-split-hidden.ll
  llvm/test/Transforms/Coroutines/coro-split-musttail.ll
  llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
  llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
  llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
  llvm/test/Transforms/Coroutines/coro-split-recursive.ll
  llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll
  llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll
  llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll
  llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll
  llvm/test/Transforms/Coroutines/coro-swifterror.ll
  llvm/test/Transforms/Coroutines/coro-zero-alloca.ll
  llvm/test/Transforms/Coroutines/no-suspend.ll
  llvm/test/Transforms/Coroutines/restart-trigger.ll
  llvm/test/Transforms/Coroutines/smoketest.ll

Index: llvm/test/Transforms/Coroutines/smoketest.ll
===================================================================
--- llvm/test/Transforms/Coroutines/smoketest.ll
+++ llvm/test/Transforms/Coroutines/smoketest.ll
@@ -10,12 +10,16 @@
 ; RUN: opt < %s -disable-output -passes='default<O3>' -enable-coroutines \
 ; RUN:     -debug-pass-manager 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT
 ; RUN: opt < %s -disable-output -debug-pass-manager \
-; RUN:     -passes='function(coro-early),cgscc(coro-split),function(coro-elide,coro-cleanup)' 2>&1 \
+; RUN:     -passes='function(coro-early),function(coro-elide),cgscc(coro-split),function(coro-cleanup)' 2>&1 \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT
 
+; note that we run CoroElidePass before CoroSplitPass. This is because CoroElidePass is part of
+; function simplification pipeline, which runs before CoroSplitPass. And since @foo is not
+; a coroutine, it won't be put back into the CGSCC, and hence won't trigger a CoroElidePass
+; after CoroSplitPass.
 ; CHECK-ALL: CoroEarlyPass
-; CHECK-ALL: CoroSplitPass
 ; CHECK-OPT: CoroElidePass
+; CHECK-ALL: CoroSplitPass
 ; CHECK-ALL: CoroCleanupPass
 
 define void @foo() {
Index: llvm/test/Transforms/Coroutines/restart-trigger.ll
===================================================================
--- llvm/test/Transforms/Coroutines/restart-trigger.ll
+++ llvm/test/Transforms/Coroutines/restart-trigger.ll
@@ -1,11 +1,14 @@
 ; REQUIRES: asserts
 ; The following tests use the new pass manager, and verify that the coroutine
 ; passes re-run the CGSCC pipeline.
-; RUN: opt < %s -S -passes='default<O0>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
-; RUN: opt < %s -S -passes='default<O1>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
+; RUN: opt < %s -S -passes='default<O0>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s
+; RUN: opt < %s -S -passes='default<O1>' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s
 
 ; CHECK:      CoroSplit: Processing coroutine 'f' state: 0
 ; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
+; CHECK-NEWPM:      CoroSplit: Processing coroutine 'f' state: 0
+; CHECK-NEWPM-NOT:  CoroSplit: Processing coroutine 'f' state: 1
+
 
 define void @f() {
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
Index: llvm/test/Transforms/Coroutines/no-suspend.ll
===================================================================
--- llvm/test/Transforms/Coroutines/no-suspend.ll
+++ llvm/test/Transforms/Coroutines/no-suspend.ll
@@ -1,5 +1,5 @@
 ; Test no suspend coroutines
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s
 
 ; Coroutine with no-suspends will turn into:
 ;
Index: llvm/test/Transforms/Coroutines/coro-zero-alloca.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-zero-alloca.ll
+++ llvm/test/Transforms/Coroutines/coro-zero-alloca.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)
Index: llvm/test/Transforms/Coroutines/coro-swifterror.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-swifterror.ll
+++ llvm/test/Transforms/Coroutines/coro-swifterror.ll
@@ -4,7 +4,7 @@
 
 define i8* @f(i8* %buffer, i32 %n, i8** swifterror %errorslot) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  coro.return:
 ; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
 ; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    tail call void @print(i32 [[N]])
@@ -44,7 +44,7 @@
 
 define i8* @g(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @g(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  coro.return:
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca swifterror i8*, align 4
 ; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
 ; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
Index: llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll
+++ llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split will optimize the lifetime.start maker of each local variable,
 ; sink them to the places after the suspend block.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s
 
 %"struct.std::coroutine_handle" = type { i8* }
 %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
Index: llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll
+++ llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll
@@ -1,6 +1,6 @@
 ; Corresponding to coro-split-sink-lifetime-01.ll. This file tests that whether the CoroFrame
 ; pass knows the operand of lifetime.start intrinsic may be GEP as well.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s
 
 %"struct.std::coroutine_handle" = type { i8* }
 %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
Index: llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll
+++ llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split will optimize the lifetime.start maker of each local variable,
 ; sink them to the places after the suspend block.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 %"struct.std::coroutine_handle" = type { i8* }
 %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
@@ -58,7 +58,6 @@
 ; CHECK:    %[[VAL:testval.+]] = getelementptr inbounds %a.Frame
 ; CHECK-NOT:     call void @llvm.lifetime.start.p0i8(i64 4, i8* %{{.*}})
 ; CHECK:         %test = load i32, i32* %[[VAL]]
-; CHECK-NOT:     %test = load i32, i32* %testval
 
 declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
 declare i1 @llvm.coro.alloc(token) #3
@@ -74,4 +73,3 @@
 declare i1 @llvm.coro.end(i8*, i1) #3
 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4
 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4
-
Index: llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll
+++ llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split will optimize the lifetime.start maker of each local variable,
 ; sink them to the places after the suspend block.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s
 
 %"struct.std::coroutine_handle" = type { i8* }
 %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
@@ -67,4 +67,3 @@
 declare i1 @llvm.coro.end(i8*, i1) #3
 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4
 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4
-
Index: llvm/test/Transforms/Coroutines/coro-split-recursive.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-recursive.ll
+++ llvm/test/Transforms/Coroutines/coro-split-recursive.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes='default<O2>' -enable-coroutines -S < %s | FileCheck %s
+; RUN: opt -passes='function(coro-early),cgscc(coro-split)' -S < %s | FileCheck %s
 
 declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
 
Index: llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define void @f() #0 {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(i8*)  {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define void @f() #0 {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-split-musttail.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define void @f() #0 {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-split-hidden.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-hidden.ll
+++ llvm/test/Transforms/Coroutines/coro-split-hidden.ll
@@ -1,7 +1,7 @@
 ; Tests that coro-split can convert functions with hidden visibility.
 ; These may be generated by a frontend such as Clang, when inlining with
 ; '-fvisibility-inlines-hidden'.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define hidden i8* @f() "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-split-eh-01.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-eh-01.ll
+++ llvm/test/Transforms/Coroutines/coro-split-eh-01.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split removes cleanup code after coro.end in resume functions
 ; and retains it in the start function.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f2(i1 %val) "coroutine.presplit"="1" personality i32 4 {
 entry:
@@ -77,4 +77,3 @@
 declare noalias i8* @malloc(i32)
 declare void @print(i32)
 declare void @free(i8*)
-
Index: llvm/test/Transforms/Coroutines/coro-split-eh-00.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-eh-00.ll
+++ llvm/test/Transforms/Coroutines/coro-split-eh-00.ll
@@ -1,6 +1,6 @@
 ; Tests that coro-split removes cleanup code after coro.end in resume functions
 ; and retains it in the start function.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f(i1 %val) "coroutine.presplit"="1" personality i32 3 {
 entry:
@@ -9,15 +9,15 @@
   call void @print(i32 0)
   br i1 %val, label %resume, label %susp
 
-susp:  
+susp:
   %0 = call i8 @llvm.coro.suspend(token none, i1 false)
-  switch i8 %0, label %suspend [i8 0, label %resume 
+  switch i8 %0, label %suspend [i8 0, label %resume
                                 i8 1, label %suspend]
 resume:
   invoke void @print(i32 1) to label %suspend unwind label %lpad
 
 suspend:
-  call i1 @llvm.coro.end(i8* %hdl, i1 0)  
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
   call void @print(i32 0) ; should not be present in f.resume
   ret i8* %hdl
 
@@ -78,9 +78,8 @@
 declare token @llvm.coro.id(i32, i8*, i8*, i8*)
 declare i8* @llvm.coro.alloc(token)
 declare i8* @llvm.coro.begin(token, i8*)
-declare i1 @llvm.coro.end(i8*, i1) 
+declare i1 @llvm.coro.end(i8*, i1)
 
 declare noalias i8* @malloc(i32)
 declare void @print(i32)
 declare void @free(i8*)
-
Index: llvm/test/Transforms/Coroutines/coro-split-dbg.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-dbg.ll
+++ llvm/test/Transforms/Coroutines/coro-split-dbg.ll
@@ -1,6 +1,6 @@
 ; Make sure that coro-split correctly deals with debug information.
 ; The test here is simply that it does not result in bad IR that will crash opt.
-; RUN: opt < %s -passes=coro-split -disable-output
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -disable-output
 source_filename = "coro.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
Index: llvm/test/Transforms/Coroutines/coro-split-alloc.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-alloc.ll
+++ llvm/test/Transforms/Coroutines/coro-split-alloc.ll
@@ -1,5 +1,5 @@
 ; Tests that coro-split passes initialized values to coroutine frame allocator.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f(i32 %argument) "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-split-02.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-02.ll
+++ llvm/test/Transforms/Coroutines/coro-split-02.ll
@@ -1,7 +1,7 @@
 ; Tests that coro-split can handle the case when a code after coro.suspend uses
 ; a value produces between coro.save and coro.suspend (%Result.i19)
 ; and checks whether stray coro.saves are properly removed
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s
 
 %"struct.std::coroutine_handle" = type { i8* }
 %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
@@ -68,4 +68,3 @@
 declare i1 @llvm.coro.end(i8*, i1) #3
 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4
 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4
-
Index: llvm/test/Transforms/Coroutines/coro-split-00.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-split-00.ll
+++ llvm/test/Transforms/Coroutines/coro-split-00.ll
@@ -1,5 +1,5 @@
 ; Tests that coro-split pass splits the coroutine into f, f.resume and f.destroy
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f() "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-spill-promise.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-spill-promise.ll
+++ llvm/test/Transforms/Coroutines/coro-spill-promise.ll
@@ -1,5 +1,5 @@
 ; Check that promise object is reloaded from the correct index of the coro frame.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 %"class.task::promise_type" = type { [64 x i8] }
 
Index: llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll
+++ llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll
@@ -1,5 +1,5 @@
 ; Verifies that phi and invoke definitions before CoroBegin are spilled properly.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s
 
 define i8* @f(i1 %n) "coroutine.presplit"="1" personality i32 0 {
 entry:
@@ -51,7 +51,7 @@
 ; CHECK-LABEL: @f(
 ; CHECK:       %alloc = call i8* @malloc(i32 32)
 ; CHECK-NEXT:  %flag = call i1 @check(i8* %alloc)
-; CHECK-NEXT:  %value_phi = select i1 %flag, i32 0, i32 1
+; CHECK-NEXT:  %spec.select = select i1 %flag, i32 0, i32 1
 ; CHECK-NEXT:  %value_invoke = call i32 @calc()
 ; CHECK-NEXT:  %hdl = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
 
@@ -59,7 +59,7 @@
 ; CHECK-NEXT:  %value_invoke.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
 ; CHECK-NEXT:  store i32 %value_invoke, i32* %value_invoke.spill.addr
 ; CHECK-NEXT:  %value_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
-; CHECK-NEXT:  store i32 %value_phi, i32* %value_phi.spill.addr
+; CHECK-NEXT:  store i32 %spec.select, i32* %value_phi.spill.addr
 
 declare i8* @llvm.coro.free(token, i8*)
 declare i32 @llvm.coro.size.i32()
Index: llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll
+++ llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll
@@ -1,5 +1,5 @@
 ; Check that we can spills coro.begin from an inlined inner coroutine.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 %g.Frame = type { void (%g.Frame*)*, void (%g.Frame*)*, i32, i1, i32 }
 
Index: llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll
+++ llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll
@@ -1,7 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; Verifies that we insert spills of PHI instruction _after) all PHI Nodes
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s
+
+; Verifies that the both phis are stored correctly in the coroutine frame
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i32, i1 }
 
 define i8* @f(i1 %n) "coroutine.presplit"="1" {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*))
+; CHECK-NEXT:    [[ALLOC:%.*]] = call i8* @malloc(i32 32)
+; CHECK-NEXT:    [[HDL:%.*]] = call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[ALLOC]])
+; CHECK-NEXT:    [[FRAMEPTR:%.*]] = bitcast i8* [[HDL]] to %f.Frame*
+; CHECK-NEXT:    [[RESUME_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0
+; CHECK-NEXT:    store void (%f.Frame*)* @f.resume, void (%f.Frame*)** [[RESUME_ADDR]], align 8
+; CHECK-NEXT:    [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
+; CHECK-NEXT:    store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** [[DESTROY_ADDR]], align 8
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[N:%.*]], i32 0, i32 2
+; CHECK-NEXT:    [[SPEC_SELECT5:%.*]] = select i1 [[N]], i32 1, i32 3
+; CHECK-NEXT:    [[PHI2_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3
+; CHECK-NEXT:    store i32 [[SPEC_SELECT5]], i32* [[PHI2_SPILL_ADDR]], align 4
+; CHECK-NEXT:    [[PHI1_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2
+; CHECK-NEXT:    store i32 [[SPEC_SELECT]], i32* [[PHI1_SPILL_ADDR]], align 4
+; CHECK-NEXT:    [[INDEX_ADDR4:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4
+; CHECK-NEXT:    store i1 false, i1* [[INDEX_ADDR4]], align 1
+; CHECK-NEXT:    ret i8* [[HDL]]
+;
 entry:
   %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
   %size = call i32 @llvm.coro.size.i32()
@@ -17,7 +41,7 @@
 
   %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
   switch i8 %sp1, label %suspend [i8 0, label %resume
-                                  i8 1, label %cleanup]
+  i8 1, label %cleanup]
 resume:
   call i32 @print(i32 %phi1)
   call i32 @print(i32 %phi2)
@@ -32,18 +56,6 @@
   ret i8* %hdl
 }
 
-; Verifies that the both phis are stored correctly in the coroutine frame
-; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i32, i1 }
-; CHECK-LABEL: @f(
-; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr
-; CHECK: %phi1 = select i1 %n, i32 0, i32 2
-; CHECK: %phi2 = select i1 %n, i32 1, i32 3
-; CHECK: %phi2.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
-; CHECK: store i32 %phi2, i32* %phi2.spill.addr
-; CHECK: %phi1.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
-; CHECK: store i32 %phi1, i32* %phi1.spill.addr
-; CHECK: ret i8* %hdl
-
 declare i8* @llvm.coro.free(token, i8*)
 declare i32 @llvm.coro.size.i32()
 declare i8  @llvm.coro.suspend(token, i1)
Index: llvm/test/Transforms/Coroutines/coro-retcon.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; First example from Doc/Coroutines.rst (two block loop) converted to retcon
-; RUN: opt < %s -enable-coroutines -passes='default<O2>' -S | FileCheck --check-prefixes=ALL,NEWPM %s
+; RUN: opt < %s -enable-coroutines -passes='default<O2>' -S | FileCheck %s
 
 define i8* @f(i8* %buffer, i32 %n) {
-; ALL-LABEL: @f(
-; ALL-NEXT:  entry:
-; ALL-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
-; ALL-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
-; ALL-NEXT:    tail call void @print(i32 [[N]])
-; ALL-NEXT:    ret i8* bitcast (i8* (i8*, i1)* @f.resume.0 to i8*)
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  coro.return:
+; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
+; CHECK-NEXT:    tail call void @print(i32 [[N]])
+; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i1)* @f.resume.0 to i8*)
 ;
 entry:
   %id = call token @llvm.coro.id.retcon(i32 8, i32 4, i8* %buffer, i8* bitcast (i8* (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*))
@@ -33,23 +33,23 @@
 
 
 define i32 @main() {
-; ALL-LABEL: @main(
-; ALL-NEXT:  entry:
-; ALL-NEXT:    [[TMP0:%.*]] = alloca [8 x i8], align 4
-; ALL-NEXT:    [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0
-; ALL-NEXT:    [[N_VAL_SPILL_ADDR_I:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32*
-; ALL-NEXT:    store i32 4, i32* [[N_VAL_SPILL_ADDR_I]], align 4
-; ALL-NEXT:    call void @print(i32 4)
-; ALL-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
-; ALL-NEXT:    [[N_VAL_RELOAD_I:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
-; ALL-NEXT:    [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1
-; ALL-NEXT:    store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
-; ALL-NEXT:    call void @print(i32 [[INC_I]]), !noalias !0
-; ALL-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
-; ALL-NEXT:    [[N_VAL_RELOAD_I1:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
-; ALL-NEXT:    [[INC_I2:%.*]] = add i32 [[N_VAL_RELOAD_I1]], 1
-; ALL-NEXT:    call void @print(i32 [[INC_I2]]), !noalias !3
-; ALL-NEXT:    ret i32 0
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca [8 x i8], align 4
+; CHECK-NEXT:    [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0
+; CHECK-NEXT:    [[N_VAL_SPILL_ADDR_I:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32*
+; CHECK-NEXT:    store i32 4, i32* [[N_VAL_SPILL_ADDR_I]], align 4
+; CHECK-NEXT:    call void @print(i32 4)
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
+; CHECK-NEXT:    [[N_VAL_RELOAD_I:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1
+; CHECK-NEXT:    store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
+; CHECK-NEXT:    call void @print(i32 [[INC_I]]), !noalias !0
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
+; CHECK-NEXT:    [[N_VAL_RELOAD_I3:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[INC_I4:%.*]] = add i32 [[N_VAL_RELOAD_I3]], 1
+; CHECK-NEXT:    call void @print(i32 [[INC_I4]]), !noalias !3
+; CHECK-NEXT:    ret i32 0
 ;
 entry:
   %0 = alloca [8 x i8], align 4
@@ -70,27 +70,16 @@
 ;   to some sort of phase-ordering thing.
 
 define hidden { i8*, i8* } @g(i8* %buffer, i16* %ptr) {
-; OLDPM-LABEL: @g(
-; OLDPM-NEXT:  entry:
-; OLDPM-NEXT:    [[TMP0:%.*]] = tail call i8* @allocate(i32 8) #[[ATTR0:[0-9]+]]
-; OLDPM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8**
-; OLDPM-NEXT:    store i8* [[TMP0]], i8** [[TMP1]], align 8
-; OLDPM-NEXT:    [[PTR_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i16**
-; OLDPM-NEXT:    store i16* [[PTR:%.*]], i16** [[PTR_SPILL_ADDR]], align 8
-; OLDPM-NEXT:    [[TMP2:%.*]] = bitcast i16* [[PTR]] to i8*
-; OLDPM-NEXT:    [[TMP3:%.*]] = insertvalue { i8*, i8* } { i8* bitcast ({ i8*, i8* } (i8*, i1)* @g.resume.0 to i8*), i8* undef }, i8* [[TMP2]], 1
-; OLDPM-NEXT:    ret { i8*, i8* } [[TMP3]]
-;
-; NEWPM-LABEL: @g(
-; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:    [[TMP0:%.*]] = tail call i8* @allocate(i32 8)
-; NEWPM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8**
-; NEWPM-NEXT:    store i8* [[TMP0]], i8** [[TMP1]], align 8
-; NEWPM-NEXT:    [[PTR_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i16**
-; NEWPM-NEXT:    store i16* [[PTR:%.*]], i16** [[PTR_SPILL_ADDR]], align 8
-; NEWPM-NEXT:    [[TMP2:%.*]] = bitcast i16* [[PTR]] to i8*
-; NEWPM-NEXT:    [[TMP3:%.*]] = insertvalue { i8*, i8* } { i8* bitcast ({ i8*, i8* } (i8*, i1)* @g.resume.0 to i8*), i8* undef }, i8* [[TMP2]], 1
-; NEWPM-NEXT:    ret { i8*, i8* } [[TMP3]]
+; CHECK-LABEL: @g(
+; CHECK-NEXT:  coro.return:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8* @allocate(i32 8) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8**
+; CHECK-NEXT:    store i8* [[TMP0]], i8** [[TMP1]], align 8
+; CHECK-NEXT:    [[PTR_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i16**
+; CHECK-NEXT:    store i16* [[PTR:%.*]], i16** [[PTR_SPILL_ADDR]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[PTR]] to i8*
+; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i8*, i8* } { i8* bitcast ({ i8*, i8* } (i8*, i1)* @g.resume.0 to i8*), i8* undef }, i8* [[TMP2]], 1
+; CHECK-NEXT:    ret { i8*, i8* } [[TMP3]]
 ;
 entry:
   %id = call token @llvm.coro.id.retcon(i32 8, i32 4, i8* %buffer, i8* bitcast ({ i8*, i8* } (i8*, i1)* @g_prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*))
@@ -123,4 +112,3 @@
 declare void @deallocate(i8* %ptr)
 
 declare void @print(i32)
-
Index: llvm/test/Transforms/Coroutines/coro-retcon-value.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-value.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-value.ll
@@ -4,7 +4,7 @@
 
 define {i8*, i32} @f(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  coro.return:
 ; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
 ; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i8)* @f.resume.0 to i8*), i32 undef }, i32 [[N]], 1
@@ -44,10 +44,10 @@
 ; CHECK-NEXT:    [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1
 ; CHECK-NEXT:    store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0
 ; CHECK-NEXT:    call void @print(i32 [[INC_I]])
-; CHECK-NEXT:    [[N_VAL_RELOAD_I1:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
-; CHECK-NEXT:    [[INC_I2:%.*]] = add i32 [[N_VAL_RELOAD_I1]], 1
-; CHECK-NEXT:    store i32 [[INC_I2]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
-; CHECK-NEXT:    call void @print(i32 [[INC_I2]])
+; CHECK-NEXT:    [[N_VAL_RELOAD_I3:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[INC_I4:%.*]] = add i32 [[N_VAL_RELOAD_I3]], 1
+; CHECK-NEXT:    store i32 [[INC_I4]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3
+; CHECK-NEXT:    call void @print(i32 [[INC_I4]])
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -89,4 +89,3 @@
 declare void @deallocate(i8* %ptr)
 
 declare void @print(i32)
-
Index: llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes='function(coro-early),cgscc(coro-split)' -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='function(coro-early),cgscc(coro-split),function(simplify-cfg,early-cse)' -S | FileCheck %s
 target datalayout = "E-p:64:64"
 
 %swift.type = type { i64 }
@@ -7,6 +8,13 @@
 %TSi = type <{ i64 }>
 
 define hidden swiftcc { i8*, %swift.opaque* } @no_suspends(i8* %buffer, i64 %arg) #1 {
+; CHECK-LABEL: @no_suspends(
+; CHECK-NEXT:  AllocaSpillBB:
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id.retcon.once(i32 32, i32 8, i8* [[BUFFER:%.*]], i8* bitcast (void (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i64)* @malloc to i8*), i8* bitcast (void (i8*)* @free to i8*))
+; CHECK-NEXT:    call void @print(i64 [[ARG:%.*]])
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
   %id = call token @llvm.coro.id.retcon.once(i32 32, i32 8, i8* %buffer, i8* bitcast (void (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i64)* @malloc to i8*), i8* bitcast (void (i8*)* @free to i8*))
   %begin = call i8* @llvm.coro.begin(token %id, i8* null)
   call void @print(i64 %arg)
@@ -18,11 +26,6 @@
   call i1 @llvm.coro.end(i8* %begin, i1 false)
   unreachable
 }
-; CHECK-LABEL: define hidden swiftcc { i8*, %swift.opaque* } @no_suspends(
-; CHECK:         call token @llvm.coro.id.retcon.once
-; CHECK-NEXT:    call void @print(i64 %arg)
-; CHECK-NEXT:    call void @llvm.trap()
-; CHECK-NEXT:    unreachable
 
 declare swiftcc void @prototype(i8* noalias dereferenceable(32), i1)
 declare void @print(i64)
Index: llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes='coro-split,coro-cleanup' -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg,coro-cleanup' -S | FileCheck %s
 
 define i8* @f(i8* %buffer, i32 %n) "coroutine.presplit"="1" {
 entry:
@@ -18,72 +19,6 @@
   unreachable
 }
 
-; CHECK-LABEL: define i8* @f(i8* %buffer, i32 %n)
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ALLOC:%.*]] = call i8* @allocate(i32 20)
-; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %buffer to i8**
-; CHECK-NEXT:    store i8* [[ALLOC]], i8** [[T0]]
-; CHECK-NEXT:    [[FRAME:%.*]] = bitcast i8* [[ALLOC]] to [[FRAME_T:%.*]]*
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0
-; CHECK-NEXT:    store i32 %n, i32* [[T0]]
-; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i32)* @f.resume.0 to i8*)
-; CHECK-NEXT:  }
-
-; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1)
-; CHECK-NEXT:  :
-; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
-; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
-; CHECK-NEXT:    [[VFRAME:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8*
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1
-; CHECK-NEXT:    store i32 %1, i32* [[T0]]
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0
-; CHECK-NEXT:    [[N:%.*]] = load i32, i32* [[T0]]
-; CHECK-NEXT:    %sum0 = call i32 @add(i32 [[N]], i32 %1)
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 2
-; CHECK-NEXT:    store i32 %sum0, i32* [[T0]]
-; CHECK-NEXT:    [[CONT:%.*]] = bitcast i8* (i8*, i32)* @f.resume.1 to i8*
-; CHECK-NEXT:    ret i8* [[CONT]]
-; CHECK-NEXT:  }
-
-; CHECK-LABEL: define internal i8* @f.resume.1(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1)
-; CHECK-NEXT:  :
-; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
-; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
-; CHECK-NEXT:    [[VFRAME:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8*
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 3
-; CHECK-NEXT:    store i32 %1, i32* [[T0]]
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 2
-; CHECK-NEXT:    [[SUM0:%.*]] = load i32, i32* [[T0]]
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1
-; CHECK-NEXT:    [[VALUE0:%.*]] = load i32, i32* [[T0]]
-; CHECK-NEXT:    %sum1 = call i32 @add(i32 [[SUM0]], i32 [[VALUE0]])
-; CHECK-NEXT:    %sum2 = call i32 @add(i32 %sum1, i32 %1)
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 4
-; CHECK-NEXT:    store i32 %sum2, i32* [[T0]]
-; CHECK-NEXT:    [[CONT:%.*]] = bitcast i8* (i8*, i32)* @f.resume.2 to i8*
-; CHECK-NEXT:    ret i8* [[CONT]]
-; CHECK-NEXT:  }
-
-; CHECK-LABEL: define internal i8* @f.resume.2(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1)
-; CHECK-NEXT:  :
-; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
-; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
-; CHECK-NEXT:    [[VFRAME:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8*
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 4
-; CHECK-NEXT:    [[SUM2:%.*]] = load i32, i32* [[T0]]
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 3
-; CHECK-NEXT:    [[VALUE1:%.*]] = load i32, i32* [[T0]]
-; CHECK-NEXT:    [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1
-; CHECK-NEXT:    [[VALUE0:%.*]] = load i32, i32* [[T0]]
-; CHECK-NEXT:    %sum3 = call i32 @add(i32 [[SUM2]], i32 [[VALUE0]])
-; CHECK-NEXT:    %sum4 = call i32 @add(i32 %sum3, i32 [[VALUE1]])
-; CHECK-NEXT:    %sum5 = call i32 @add(i32 %sum4, i32 %1)
-; CHECK-NEXT:    call void @print(i32 %sum5)
-; CHECK-NEXT:    [[CONT:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8*
-; CHECK-NEXT:    call void @deallocate(i8* [[CONT]])
-; CHECK-NEXT:    ret i8* null
-; CHECK-NEXT:  }
-
 declare token @llvm.coro.id.retcon(i32, i32, i8*, i8*, i8*, i8*)
 declare i8* @llvm.coro.begin(token, i8*)
 declare i32 @llvm.coro.suspend.retcon.i32(...)
@@ -98,3 +33,63 @@
 declare i32 @add(i32, i32)
 declare void @print(i32)
 
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @allocate(i32 20)
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8**
+; CHECK-NEXT:    store i8* [[TMP0]], i8** [[TMP1]], align 8
+; CHECK-NEXT:    [[FRAMEPTR:%.*]] = bitcast i8* [[TMP0]] to %f.Frame*
+; CHECK-NEXT:    [[N_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0
+; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_SPILL_ADDR]], align 4
+; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i32)* @f.resume.0 to i8*)
+;
+;
+; CHECK-LABEL: @f.resume.0(
+; CHECK-NEXT:  entryresume.0:
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame**
+; CHECK-NEXT:    [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8
+; CHECK-NEXT:    [[VALUE0_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP1:%.*]], i32* [[VALUE0_SPILL_ADDR]], align 4
+; CHECK-NEXT:    [[N_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0
+; CHECK-NEXT:    [[N_RELOAD:%.*]] = load i32, i32* [[N_RELOAD_ADDR]], align 4
+; CHECK-NEXT:    [[SUM0:%.*]] = call i32 @add(i32 [[N_RELOAD]], i32 [[TMP1]])
+; CHECK-NEXT:    [[SUM0_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2
+; CHECK-NEXT:    store i32 [[SUM0]], i32* [[SUM0_SPILL_ADDR]], align 4
+; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i32)* @f.resume.1 to i8*)
+;
+;
+; CHECK-LABEL: @f.resume.1(
+; CHECK-NEXT:  entryresume.1:
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame**
+; CHECK-NEXT:    [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8
+; CHECK-NEXT:    [[VALUE1_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3
+; CHECK-NEXT:    store i32 [[TMP1:%.*]], i32* [[VALUE1_SPILL_ADDR]], align 4
+; CHECK-NEXT:    [[SUM0_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2
+; CHECK-NEXT:    [[SUM0_RELOAD:%.*]] = load i32, i32* [[SUM0_RELOAD_ADDR]], align 4
+; CHECK-NEXT:    [[VALUE0_RELOAD_ADDR5:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
+; CHECK-NEXT:    [[VALUE0_RELOAD6:%.*]] = load i32, i32* [[VALUE0_RELOAD_ADDR5]], align 4
+; CHECK-NEXT:    [[SUM1:%.*]] = call i32 @add(i32 [[SUM0_RELOAD]], i32 [[VALUE0_RELOAD6]])
+; CHECK-NEXT:    [[SUM2:%.*]] = call i32 @add(i32 [[SUM1]], i32 [[TMP1]])
+; CHECK-NEXT:    [[SUM2_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4
+; CHECK-NEXT:    store i32 [[SUM2]], i32* [[SUM2_SPILL_ADDR]], align 4
+; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i32)* @f.resume.2 to i8*)
+;
+;
+; CHECK-LABEL: @f.resume.2(
+; CHECK-NEXT:  entryresume.2:
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame**
+; CHECK-NEXT:    [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8
+; CHECK-NEXT:    [[SUM2_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4
+; CHECK-NEXT:    [[SUM2_RELOAD:%.*]] = load i32, i32* [[SUM2_RELOAD_ADDR]], align 4
+; CHECK-NEXT:    [[VALUE1_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3
+; CHECK-NEXT:    [[VALUE1_RELOAD:%.*]] = load i32, i32* [[VALUE1_RELOAD_ADDR]], align 4
+; CHECK-NEXT:    [[VALUE0_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
+; CHECK-NEXT:    [[VALUE0_RELOAD:%.*]] = load i32, i32* [[VALUE0_RELOAD_ADDR]], align 4
+; CHECK-NEXT:    [[SUM3:%.*]] = call i32 @add(i32 [[SUM2_RELOAD]], i32 [[VALUE0_RELOAD]])
+; CHECK-NEXT:    [[SUM4:%.*]] = call i32 @add(i32 [[SUM3]], i32 [[VALUE1_RELOAD]])
+; CHECK-NEXT:    [[SUM5:%.*]] = call i32 @add(i32 [[SUM4]], i32 [[TMP1:%.*]])
+; CHECK-NEXT:    call void @print(i32 [[SUM5]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR]] to i8*
+; CHECK-NEXT:    call void @deallocate(i8* [[TMP3]])
+; CHECK-NEXT:    ret i8* null
+;
Index: llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
@@ -3,9 +3,12 @@
 
 define i8* @f(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
-; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
+; CHECK-NEXT:  coro.return:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8* @allocate(i32 12)
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8**
+; CHECK-NEXT:    store i8* [[TMP0]], i8** [[TMP1]], align 8
+; CHECK-NEXT:    [[N_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i32*
+; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    ret i8* bitcast (i8* (i8*, i32, i1)* @f.resume.0 to i8*)
 ;
 entry:
@@ -35,11 +38,40 @@
 define i32 @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca [8 x i8], align 4
-; CHECK-NEXT:    [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0
-; CHECK-NEXT:    [[N_VAL_RELOAD_ADDR_I1:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32*
-; CHECK-NEXT:    store i32 7, i32* [[N_VAL_RELOAD_ADDR_I1]], align 4, !alias.scope !0
-; CHECK-NEXT:    call void @print(i32 7), !noalias !3
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[DOTSUB:%.*]] = bitcast i8** [[TMP0]] to i8*
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8* @allocate(i32 12)
+; CHECK-NEXT:    store i8* [[TMP1]], i8** [[TMP0]], align 8
+; CHECK-NEXT:    [[N_SPILL_ADDR_I:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 1, i32* [[N_SPILL_ADDR_I]], align 4
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8** [[TMP0]] to %f.Frame**
+; CHECK-NEXT:    [[FRAMEPTR_I:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[N_RELOAD_ADDR9_I:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 0
+; CHECK-NEXT:    [[N_RELOAD10_I:%.*]] = load i32, i32* [[N_RELOAD_ADDR9_I]], align 4, !noalias !0
+; CHECK-NEXT:    [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 1
+; CHECK-NEXT:    store i32 [[N_RELOAD10_I]], i32* [[N_VAL3_SPILL_ADDR_I]], align 4, !noalias !0
+; CHECK-NEXT:    [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 2
+; CHECK-NEXT:    store i32 2, i32* [[INPUT_SPILL_ADDR_I]], align 4, !noalias !0
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
+; CHECK-NEXT:    [[FRAMEPTR_I1:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !3
+; CHECK-NEXT:    [[INPUT_RELOAD_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 2
+; CHECK-NEXT:    [[INPUT_RELOAD_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !3
+; CHECK-NEXT:    [[N_VAL3_RELOAD_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 1
+; CHECK-NEXT:    [[N_VAL3_RELOAD_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !3
+; CHECK-NEXT:    [[SUM8_I:%.*]] = add i32 [[N_VAL3_RELOAD_I]], [[INPUT_RELOAD_I]]
+; CHECK-NEXT:    store i32 [[SUM8_I]], i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !3
+; CHECK-NEXT:    store i32 4, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !3
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
+; CHECK-NEXT:    [[FRAMEPTR_I4:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !6
+; CHECK-NEXT:    [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I4]], i64 0, i32 2
+; CHECK-NEXT:    [[INPUT_RELOAD14_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !6
+; CHECK-NEXT:    [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I4]], i64 0, i32 1
+; CHECK-NEXT:    [[N_VAL3_RELOAD12_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !6
+; CHECK-NEXT:    [[SUM7_I:%.*]] = add i32 [[N_VAL3_RELOAD12_I]], [[INPUT_RELOAD14_I]]
+; CHECK-NEXT:    call void @print(i32 [[SUM7_I]]), !noalias !6
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR_I4]] to i8*
+; CHECK-NEXT:    call void @deallocate(i8* [[TMP3]]), !noalias !6
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
Index: llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes='cgscc(coro-split),function(coro-cleanup)' -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,coro-cleanup' -S | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.12.0"
 
@@ -22,39 +23,7 @@
   unreachable
 }
 
-; CHECK-LABEL: define { i8*, i32* } @f(i8* %buffer, i32* %ptr)
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ALLOC:%.*]] = call i8* @allocate(i32 16)
-; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %buffer to i8**
-; CHECK-NEXT:    store i8* [[ALLOC]], i8** [[T0]]
-; CHECK-NEXT:    [[FRAME:%.*]] = bitcast i8* [[ALLOC]] to [[FRAME_T:%.*]]*
-; CHECK-NEXT:    %temp = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1
-; CHECK-NEXT:    [[SPILL:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0
-; CHECK-NEXT:    store i32* %ptr, i32** [[SPILL]]
-; CHECK-NEXT:    %oldvalue = load i32, i32* %ptr
-; CHECK-NEXT:    store i32 %oldvalue, i32* %temp
-; CHECK-NEXT:    [[T0:%.*]] = insertvalue { i8*, i32* } { i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i32* undef }, i32* %temp, 1
-; CHECK-NEXT:    ret { i8*, i32* } [[T0]]
-; CHECK-NEXT:  }
 
-; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1)
-; CHECK-NEXT:  :
-; CHECK-NEXT:    [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]**
-; CHECK-NEXT:    [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]]
-; CHECK-NEXT:    bitcast [[FRAME_T]]* [[FRAME]] to i8*
-; CHECK-NEXT:    [[TEMP_SLOT:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1
-; CHECK-NEXT:    br i1 %1,
-; CHECK:       :
-; CHECK-NEXT:    [[PTR_SLOT:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0
-; CHECK-NEXT:    [[PTR_RELOAD:%.*]] = load i32*, i32** [[PTR_SLOT]]
-; CHECK-NEXT:    %newvalue = load i32, i32* [[TEMP_SLOT]]
-; CHECK-NEXT:    store i32 %newvalue, i32* [[PTR_RELOAD]]
-; CHECK-NEXT:    br label
-; CHECK:       :
-; CHECK-NEXT:    [[T0:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8*
-; CHECK-NEXT:    call fastcc void @deallocate(i8* [[T0]])
-; CHECK-NEXT:    ret void
-; CHECK-NEXT:  }
 
 declare token @llvm.coro.id.retcon.once(i32, i32, i8*, i8*, i8*, i8*)
 declare i8* @llvm.coro.begin(token, i8*)
@@ -67,4 +36,35 @@
 declare fastcc void @deallocate(i8* %ptr)
 
 declare void @print(i32)
-
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @allocate(i32 16)
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8**
+; CHECK-NEXT:    store i8* [[TMP0]], i8** [[TMP1]], align 8
+; CHECK-NEXT:    [[FRAMEPTR:%.*]] = bitcast i8* [[TMP0]] to %f.Frame*
+; CHECK-NEXT:    [[TEMP:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
+; CHECK-NEXT:    [[PTR_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0
+; CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_SPILL_ADDR]], align 8
+; CHECK-NEXT:    [[OLDVALUE:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT:    store i32 [[OLDVALUE]], i32* [[TEMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i8*, i32* } { i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i32* undef }, i32* [[TEMP]], 1
+; CHECK-NEXT:    ret { i8*, i32* } [[TMP2]]
+;
+;
+; CHECK-LABEL: @f.resume.0(
+; CHECK-NEXT:  entryresume.0:
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame**
+; CHECK-NEXT:    [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8
+; CHECK-NEXT:    [[TEMP:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1
+; CHECK-NEXT:    br i1 [[TMP1:%.*]], label [[COROEND:%.*]], label [[CONT:%.*]]
+; CHECK:       cont:
+; CHECK-NEXT:    [[PTR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0
+; CHECK-NEXT:    [[PTR_RELOAD:%.*]] = load i32*, i32** [[PTR_RELOAD_ADDR]], align 8
+; CHECK-NEXT:    [[NEWVALUE:%.*]] = load i32, i32* [[TEMP]], align 4
+; CHECK-NEXT:    store i32 [[NEWVALUE]], i32* [[PTR_RELOAD]], align 4
+; CHECK-NEXT:    br label [[COROEND]]
+; CHECK:       CoroEnd:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR]] to i8*
+; CHECK-NEXT:    call fastcc void @deallocate(i8* [[TMP3]])
+; CHECK-NEXT:    ret void
+;
Index: llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
@@ -6,16 +6,16 @@
 
 define {i8*, i32} @f(i8* %buffer, i32* %array) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  PostSpill:
 ; CHECK-NEXT:    [[ARRAY_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32**
 ; CHECK-NEXT:    store i32* [[ARRAY:%.*]], i32** [[ARRAY_SPILL_ADDR]], align 8
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[ARRAY]], align 4
 ; CHECK-NEXT:    [[LOAD_POS:%.*]] = icmp sgt i32 [[LOAD]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[LOAD_POS]], i32 [[LOAD]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[LOAD_POS]], i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i8* bitcast (void (i8*, i1)* @f.resume.1 to i8*)
-; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i8*, i32 } undef, i8* [[TMP1]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i8*, i32 } [[TMP2]], i32 [[TMP0]], 1
-; CHECK-NEXT:    ret { i8*, i32 } [[TMP3]]
+; CHECK-NEXT:    [[SPEC_SELECT4:%.*]] = select i1 [[LOAD_POS]], i32 [[LOAD]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[LOAD_POS]], i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i8* bitcast (void (i8*, i1)* @f.resume.1 to i8*)
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8*, i32 } undef, i8* [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i8*, i32 } [[TMP1]], i32 [[SPEC_SELECT4]], 1
+; CHECK-NEXT:    ret { i8*, i32 } [[TMP2]]
 ;
 entry:
   %id = call token @llvm.coro.id.retcon.once(i32 8, i32 8, i8* %buffer, i8* bitcast (void (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*))
@@ -56,8 +56,8 @@
 ; CHECK-NEXT:    store i32* [[ARRAY:%.*]], i32** [[TMP0]], align 8
 ; CHECK-NEXT:    [[LOAD_I:%.*]] = load i32, i32* [[ARRAY]], align 4
 ; CHECK-NEXT:    [[LOAD_POS_I:%.*]] = icmp sgt i32 [[LOAD_I]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[LOAD_POS_I]], i32 [[LOAD_I]], i32 0
-; CHECK-NEXT:    call void @print(i32 [[TMP1]])
+; CHECK-NEXT:    [[SPEC_SELECT4_I:%.*]] = select i1 [[LOAD_POS_I]], i32 [[LOAD_I]], i32 0
+; CHECK-NEXT:    call void @print(i32 [[SPEC_SELECT4_I]])
 ; CHECK-NEXT:    [[CONT_CAST:%.*]] = select i1 [[LOAD_POS_I]], void (i8*, i1)* @f.resume.0, void (i8*, i1)* @f.resume.1
 ; CHECK-NEXT:    call void [[CONT_CAST]](i8* nonnull [[DOTSUB]], i1 zeroext false)
 ; CHECK-NEXT:    ret void
Index: llvm/test/Transforms/Coroutines/coro-retcon-frame.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-frame.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-frame.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 target datalayout = "p:64:64:64"
 
Index: llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
+++ llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
@@ -6,11 +6,11 @@
 declare {i8*, i8*, i32} @prototype_f(i8*, i1)
 define {i8*, i8*, i32} @f(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  coro.return:
 ; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[BUFFER:%.*]], i64 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[N_VAL_SPILL_ADDR]] to i32*
 ; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8* @allocate(i32 [[N]])
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8* @allocate(i32 [[N]]) #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    [[DOTSPILL_ADDR:%.*]] = bitcast i8* [[BUFFER]] to i8**
 ; CHECK-NEXT:    store i8* [[TMP1]], i8** [[DOTSPILL_ADDR]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[TMP1]], 1
@@ -44,12 +44,12 @@
 declare {i8*, i32} @prototype_g(i8*, i1)
 define {i8*, i32} @g(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @g(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  coro.return:
 ; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
 ; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[TMP0]], align 8
-; CHECK-NEXT:    call void @use(i8* nonnull [[TMP1]])
+; CHECK-NEXT:    tail call void @use(i8* nonnull [[TMP1]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @g.resume.0 to i8*), i32 undef }, i32 [[N]], 1
 ; CHECK-NEXT:    ret { i8*, i32 } [[TMP2]]
 ;
@@ -81,9 +81,9 @@
 declare {i8*, i32} @prototype_h(i8*, i1)
 define {i8*, i32} @h(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @h(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
-; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
+; CHECK-NEXT:  coro.return:
+; CHECK-NEXT:    [[N_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 [[N]], 1
 ; CHECK-NEXT:    ret { i8*, i32 } [[TMP0]]
 ;
@@ -115,7 +115,7 @@
 declare {i8*, i32} @prototype_i(i8*)
 define {i8*, i32} @i(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @i(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  coro.return:
 ; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
 ; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 [[N]], 1
@@ -148,7 +148,7 @@
 declare {i8*, i32} @prototype_j(i8*)
 define {i8*, i32} @j(i8* %buffer, i32 %n) {
 ; CHECK-LABEL: @j(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  coro.return:
 ; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32*
 ; CHECK-NEXT:    store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @j.resume.0 to i8*), i32 undef }, i32 [[N]], 1
@@ -183,7 +183,7 @@
 declare i32 @getSize()
 define {i8*, i32} @k(i8* %buffer, i32 %n, i1 %cond) {
 ; CHECK-LABEL: @k(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  PostSpill:
 ; CHECK-NEXT:    [[SIZE:%.*]] = tail call i32 @getSize()
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[ALLOCA_BLOCK:%.*]], label [[CORO_RETURN:%.*]]
 ; CHECK:       coro.return:
@@ -192,7 +192,7 @@
 ; CHECK:       alloca_block:
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[SIZE]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
-; CHECK-NEXT:    call void @use(i8* nonnull [[TMP2]])
+; CHECK-NEXT:    tail call void @use(i8* nonnull [[TMP2]])
 ; CHECK-NEXT:    br label [[CORO_RETURN]]
 ;
 entry:
Index: llvm/test/Transforms/Coroutines/coro-param-copy.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-param-copy.ll
+++ llvm/test/Transforms/Coroutines/coro-param-copy.ll
@@ -1,6 +1,6 @@
 ; Check that we create copy the data from the alloca into the coroutine
 ; frame slot if it was written to.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f() "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-padding.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-padding.ll
+++ llvm/test/Transforms/Coroutines/coro-padding.ll
@@ -1,6 +1,6 @@
 ; Check that we will insert the correct padding if natural alignment of the
 ; spilled data does not match the alignment specified in alloca instruction.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 %PackedStruct = type <{ i64 }>
 
Index: llvm/test/Transforms/Coroutines/coro-materialize.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-materialize.ll
+++ llvm/test/Transforms/Coroutines/coro-materialize.ll
@@ -1,5 +1,5 @@
 ; Verifies that we materialize instruction across suspend points
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f(i32 %n) "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-frame.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame.ll
+++ llvm/test/Transforms/Coroutines/coro-frame.ll
@@ -1,5 +1,5 @@
 ; Check that we can handle spills of the result of the invoke instruction
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f(i64 %this) "coroutine.presplit"="1" personality i32 0 {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll
@@ -1,5 +1,5 @@
 ; Check that coro-split doesn't choke on intrinsics in unreachable blocks
-; RUN: opt < %s -passes=coro-split -S
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S
 
 define i8* @f(i1 %arg) "coroutine.presplit"="1" personality i32 0 {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll
@@ -1,6 +1,6 @@
 ; Tests that variables of different type with incompatible alignment in a Corotuine whose 
 ; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame.
-; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s
 %"struct.task::promise_type" = type { i8 }
 %struct.awaitable = type { i8 }
 %struct.big_structure = type { [500 x i8] }
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll
@@ -1,6 +1,6 @@
 ; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime 
 ; range is not overlapping each other should not re-use the same slot in Coroutine frame. 
-; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s
 %"struct.task::promise_type" = type { i8 }
 %struct.awaitable = type { i8 }
 %struct.big_structure = type { [500 x i8] }
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll
@@ -1,5 +1,5 @@
 ; Check that we should not reuse alloca sotrage in O0.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 %struct.big_structure = type { [500 x i8] }
 declare void @consume(%struct.big_structure*)
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll
@@ -1,6 +1,6 @@
 ; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other
 ; re-use the same slot in Coroutine frame.
-; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s
 %"struct.task::promise_type" = type { i8 }
 %struct.awaitable = type { i8 }
 %struct.big_structure = type { [500 x i8] }
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll
@@ -1,6 +1,6 @@
 ; Tests that variables in a Corotuine whose lifetime range is not overlapping each other
 ; re-use the same slot in Coroutine frame.
-; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s
 %"struct.task::promise_type" = type { i8 }
 %struct.awaitable = type { i8 }
 %struct.big_structure = type { [500 x i8] }
Index: llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll
@@ -1,5 +1,5 @@
 ; Check that we can handle spills of array allocas
-; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s
 
 %struct.big_structure = type { [500 x i8] }
 declare void @consume(%struct.big_structure*)
Index: llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll
+++ llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll
@@ -1,5 +1,5 @@
 ; Check that we can handle spills of array allocas
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 declare void @consume.double.ptr(double*)
 declare void @consume.i32.ptr(i32*)
Index: llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll
+++ llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll
@@ -1,5 +1,5 @@
 ; Check that we can handle edge splits leading into a landingpad
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
Index: llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll
+++ llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll
@@ -1,5 +1,5 @@
 ; Check that we can handle edge splits leading into a landingpad
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
Index: llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll
+++ llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll
@@ -1,5 +1,5 @@
 ; Check that we can handle edge splits leading into a landingpad
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
Index: llvm/test/Transforms/Coroutines/coro-debug.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-debug.ll
+++ llvm/test/Transforms/Coroutines/coro-debug.ll
@@ -1,5 +1,5 @@
 ; Tests that debug information is sane after coro-split
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 source_filename = "simple-repro.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Index: llvm/test/Transforms/Coroutines/coro-catchswitch.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-catchswitch.ll
+++ llvm/test/Transforms/Coroutines/coro-catchswitch.ll
@@ -1,5 +1,5 @@
 ; Verifies that we can insert the spill for a PHI preceding the catchswitch
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc"
Index: llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll
+++ llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll
@@ -1,6 +1,6 @@
 ; Tests the PHI nodes in cleanuppads for catchswitch instructions are correctly
 ; split up.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 declare i32 @__CxxFrameHandler3(...)
 define i8* @f2(i1 %val) "coroutine.presplit"="1" personality i32 (...)* @__CxxFrameHandler3 {
@@ -106,11 +106,10 @@
 declare token @llvm.coro.id(i32, i8*, i8*, i8*)
 declare i1 @llvm.coro.alloc(token)
 declare i8* @llvm.coro.begin(token, i8*)
-declare i1 @llvm.coro.end(i8*, i1) 
+declare i1 @llvm.coro.end(i8*, i1)
 
 declare noalias i8* @malloc(i32)
 declare void @print(i32)
 declare void @free(i8*)
 
 declare i32 @f()
-
Index: llvm/test/Transforms/Coroutines/coro-byval-param.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-byval-param.ll
+++ llvm/test/Transforms/Coroutines/coro-byval-param.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 %promise_type = type { i8 }
 %struct.A = type <{ i64, i64, i32, [4 x i8] }>
 
Index: llvm/test/Transforms/Coroutines/coro-async.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-async.ll
+++ llvm/test/Transforms/Coroutines/coro-async.ll
@@ -120,7 +120,7 @@
 
 ; CHECK-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
 ; CHECK-SAME: !dbg ![[SP1:[0-9]+]] {
-; CHECK: entry:
+; CHECK: coro.return:
 ; CHECK:   [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128
 ; CHECK:   [[ACTOR_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 152
 ; CHECK:   [[CAST1:%.*]] = bitcast i8* [[ACTOR_SPILL_ADDR]] to %async.actor**
Index: llvm/test/Transforms/Coroutines/coro-alloca-08.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-08.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-08.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 %"struct.std::coroutine_handle" = type { i8* }
 %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
Index: llvm/test/Transforms/Coroutines/coro-alloca-07.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-07.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-07.ll
@@ -1,6 +1,6 @@
 ; Tests that CoroSplit can succesfully determine allocas should live on the frame
 ; if their aliases are used across suspension points through PHINode.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f(i1 %n) "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-alloca-06.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-06.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-06.ll
@@ -1,6 +1,6 @@
 ; Test that in some simple cases allocas will not live on the frame even
 ; though their pointers are stored.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 %handle = type { i8* }
 
Index: llvm/test/Transforms/Coroutines/coro-alloca-05.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-05.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-05.ll
@@ -1,6 +1,6 @@
 ; Tests that allocas after coro.begin are properly that do not need to
 ; live on the frame are properly moved to the .resume function.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f() "coroutine.presplit"="1" {
 entry:
@@ -31,10 +31,10 @@
 ; CHECK-NEXT:  entry.resume:
 ; CHECK-NEXT:    [[VFRAME:%.*]] = bitcast %f.Frame* [[FRAMEPTR:%.*]] to i8*
 ; CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4
+; CHECK:         [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4
 ; CHECK-NEXT:    call void @print(i32 [[X_VALUE]])
-; CHECK-NEXT:    call void @free(i8* [[VFRAME]])
-; CHECK-NEXT:    ret void
+; CHECK:         call void @free(i8* [[VFRAME]])
+; CHECK:         ret void
 
 declare i8* @llvm.coro.free(token, i8*)
 declare i32 @llvm.coro.size.i32()
Index: llvm/test/Transforms/Coroutines/coro-alloca-04.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-04.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-04.ll
@@ -1,6 +1,6 @@
 ; Tests that CoroSplit can succesfully determine allocas should live on the frame
 ; if their aliases are used across suspension points through PHINode.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f(i1 %n) "coroutine.presplit"="1" {
 entry:
@@ -45,7 +45,7 @@
 ; CHECK-NEXT:    %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
 ; CHECK-NEXT:    %1 = bitcast i64* %0 to i8*
 ; CHECK-NEXT:    %2 = bitcast i8* %1 to i32*
-; CHECK-NEXT:    %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
+; CHECK:         %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3
 ; CHECK-NEXT:    store i32* %2, i32** %alias_phi.spill.addr
 
 declare i8* @llvm.coro.free(token, i8*)
Index: llvm/test/Transforms/Coroutines/coro-alloca-03.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-03.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-03.ll
@@ -1,5 +1,5 @@
 ; Tests that allocas escaped through function calls will live on the frame.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f() "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-alloca-02.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-02.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-02.ll
@@ -1,6 +1,6 @@
 ; Tests that if an alloca is escaped through storing the address,
 ; the alloac will be put on the frame.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f() "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-alloca-01.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-01.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-01.ll
@@ -1,6 +1,6 @@
 ; Tests that CoroSplit can succesfully determine allocas should live on the frame
 ; if their aliases are used across suspension points through PHINode.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define i8* @f(i1 %n) "coroutine.presplit"="1" {
 entry:
Index: llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll
+++ llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll
@@ -1,6 +1,6 @@
 ; Check that we can handle the case when both alloc function and
 ; the user body consume the same argument.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 ; using this directly (as it would happen under -O2)
 define i8* @f_direct(i64 %this) "coroutine.presplit"="1" {
Index: llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
+++ llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
@@ -1,6 +1,6 @@
 ; Check that we can handle the case when both alloc function and
 ; the user body consume the same argument.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 ; using copy of this (as it would happen under -O0)
 define i8* @f_copy(i64 %this_arg) "coroutine.presplit"="1" {
@@ -33,15 +33,17 @@
 
 ; See that %this is spilled into the frame
 ; CHECK-LABEL: define i8* @f_copy(i64 %this_arg)
+; CHECK:  %this.addr = alloca i64, align 8
+; CHECK:  store i64 %this_arg, i64* %this.addr, align 4
 ; CHECK:  %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 2
 ; CHECK:  store i64 %this_arg, i64* %this.spill.addr
-; CHECK: ret i8* %hdl
+; CHECK:  ret i8* %hdl
 
 ; See that %this was loaded from the frame
 ; CHECK-LABEL: @f_copy.resume(
 ; CHECK:  %this.reload = load i64, i64* %this.reload.addr
 ; CHECK:  call void @print2(i64 %this.reload)
-; CHECK: ret void
+; CHECK:  ret void
 
 declare i8* @llvm.coro.free(token, i8*)
 declare i32 @llvm.coro.size.i32()
Index: llvm/test/Transforms/Coroutines/ArgAddr.ll
===================================================================
--- llvm/test/Transforms/Coroutines/ArgAddr.ll
+++ llvm/test/Transforms/Coroutines/ArgAddr.ll
@@ -1,6 +1,6 @@
 ; Need to move users of allocas that were moved into the coroutine frame after
 ; coro.begin.
-; RUN: opt < %s -passes=coro-split -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s
 
 define nonnull i8* @f(i32 %n) "coroutine.presplit"="1" {
 ; CHECK-LABEL: @f(
Index: llvm/lib/Transforms/Coroutines/CoroSplit.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1134,17 +1134,6 @@
   // pass to FPM below because it will also verify all the global data.
   if (verifyFunction(F, &errs()))
     report_fatal_error("Broken function");
-
-  legacy::FunctionPassManager FPM(F.getParent());
-
-  FPM.add(createSCCPPass());
-  FPM.add(createCFGSimplificationPass());
-  FPM.add(createEarlyCSEPass());
-  FPM.add(createCFGSimplificationPass());
-
-  FPM.doInitialization();
-  FPM.run(F);
-  FPM.doFinalization();
 }
 
 // Assuming we arrived at the block NewBlock from Prev instruction, store
@@ -2123,24 +2112,17 @@
     StringRef Value = Attr.getValueAsString();
     LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
                       << "' state: " << Value << "\n");
-    if (Value == UNPREPARED_FOR_SPLIT) {
-      // Enqueue a second iteration of the CGSCC pipeline on this SCC.
-      UR.CWorklist.insert(&C);
-      F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
-      continue;
-    }
     F.removeFnAttr(CORO_PRESPLIT_ATTR);
 
     SmallVector<Function *, 4> Clones;
     const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot);
     updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
 
-    if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon ||
-         Shape.ABI == coro::ABI::RetconOnce) &&
-        !Shape.CoroSuspends.empty()) {
-      // Run the CGSCC pipeline on the newly split functions.
-      // All clones will be in the same RefSCC, so choose a random clone.
-      UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0])));
+    if (!Shape.CoroSuspends.empty()) {
+      // Run the CGSCC pipeline on the original and newly split functions.
+      UR.CWorklist.insert(&C);
+      for (Function *Clone : Clones)
+        UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
     }
   }
 
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -1002,9 +1002,6 @@
   if (AttributorRun & AttributorRunOption::CGSCC)
     MainCGPipeline.addPass(AttributorCGSCCPass());
 
-  if (PTO.Coroutines)
-    MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
-
   // Now deduce any function attributes based in the current code.
   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
 
@@ -1026,6 +1023,9 @@
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
       buildFunctionSimplificationPipeline(Level, Phase)));
 
+  if (PTO.Coroutines)
+    MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+
   return MIWP;
 }
 
Index: clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
===================================================================
--- clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
+++ clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp
@@ -10,12 +10,6 @@
 //
 // CHECK-ALL: Running pass:{{.*}}CoroEarlyPass
 //
-// The first coro-split pass enqueues a second run of the entire CGSCC pipeline.
-// CHECK-ALL: Running pass: CoroSplitPass on (_Z3foov)
-// CHECK-OPT: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}}
-//
-// The second coro-split pass splits coroutine 'foo' into funclets
-// 'foo.resume', 'foo.destroy', and 'foo.cleanup'.
 // CHECK-ALL: Running pass: CoroSplitPass on (_Z3foov)
 // CHECK-OPT: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}}
 //
@@ -27,7 +21,7 @@
 struct handle {};
 
 struct awaitable {
-  bool await_ready() noexcept { return true; }
+  bool await_ready() noexcept { return false; }
   void await_suspend(handle) noexcept {}
   bool await_resume() noexcept { return true; }
 };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to