yroux updated this revision to Diff 236838. yroux added a comment. Herald added subscribers: cfe-commits, hiraditya. Herald added a project: clang.
Hi, here is an update for the Machine Outliner support on ARM targets. It addresses the comments received on the initial version and is rebased on current trunk. Among the modifications needed by this rebase, this patch propose to move the ARM Low Overhead Loops pass before the Outliner and the Constant Island ones, because it requires the function to track the liveness (which is not the case of the outlined ones) and moving the Constant Island at the end seems more appropriate. If it is ok I'll split the patch and propose the move in a different review, but here is the whole thing such that you can test it. Thanks Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D57054/new/ https://reviews.llvm.org/D57054 Files: clang/lib/Driver/ToolChains/Clang.cpp llvm/lib/CodeGen/MachineOutliner.cpp llvm/lib/CodeGen/ReachingDefAnalysis.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.h llvm/lib/Target/ARM/ARMTargetMachine.cpp llvm/test/CodeGen/ARM/O3-pipeline.ll llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir llvm/test/CodeGen/ARM/machine-outliner-tail.ll llvm/test/CodeGen/ARM/machine-outliner-thunk.ll llvm/test/CodeGen/ARM/machine-outliner.ll llvm/test/CodeGen/ARM/machine-outliner.mir llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -350,7 +350,7 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: ldr.w r12, [sp, #28] ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: beq.w .LBB5_12 +; CHECK-NEXT: beq .LBB5_12 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph ; CHECK-NEXT: add.w r5, r3, r12, lsl #2 ; CHECK-NEXT: add.w r6, r1, r12 @@ -647,7 +647,7 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: ldr.w r12, [sp, #28] ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: beq.w .LBB7_12 +; CHECK-NEXT: beq .LBB7_12 ; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph ; CHECK-NEXT: add.w r5, r3, r12, lsl #2 ; CHECK-NEXT: add.w r6, r1, r12 @@ -944,7 +944,7 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: ldr.w r12, [sp, #32] ; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: beq.w .LBB9_11 +; CHECK-NEXT: beq .LBB9_11 ; CHECK-NEXT: @ %bb.1: @ %vector.memcheck ; CHECK-NEXT: add.w r4, r3, r12, lsl #2 ; CHECK-NEXT: add.w r5, r1, r12, lsl #2 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -930,8 +930,7 @@ ; CHECK-LABEL: float_int_int_mul: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB4_8 +; CHECK-NEXT: cbz r3, .LBB4_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB4_3 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll @@ -16,7 +16,7 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 +; CHECK: t2Bcc %bb.3 ; CHECK: bb.1.while.body.preheader: ; CHECK: $lr = t2DLS renamable $lr ; CHECK: bb.2.while.body: @@ -49,7 +49,7 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 +; CHECK: t2Bcc %bb.3 ; CHECK: bb.1.while.body.preheader: ; CHECK: $lr = t2DLS renamable $lr ; CHECK: bb.2.while.body: @@ -84,7 +84,7 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 +; CHECK: t2Bcc %bb.3 ; CHECK: bb.1.while.body.preheader: ; CHECK: $lr = t2DLS renamable $lr ; CHECK: bb.2.while.body: @@ -119,7 +119,7 @@ ; CHECK: body: ; CHECK: bb.0.entry: ; CHECK: t2CMPri renamable $lr, 0 -; CHECK: tBcc %bb.3 +; CHECK: t2Bcc %bb.3 ; CHECK: bb.1.while.body.preheader: ; CHECK: $lr = t2DLS renamable $lr ; CHECK: bb.2.while.body: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -6,7 +6,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq.w .LBB0_11 +; CHECK-NEXT: beq .LBB0_11 ; CHECK-NEXT: @ %bb.1: @ %vector.memcheck ; CHECK-NEXT: add.w r4, r0, r3, lsl #2 ; CHECK-NEXT: add.w r5, r2, r3, lsl #2 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -288,8 +288,7 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: ldr r5, [sp, #20] -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: beq .LBB3_4 +; CHECK-NEXT: cbz r5, .LBB3_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: adds r4, r5, #3 ; CHECK-NEXT: vmov.i32 q1, #0x0 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll @@ -9,7 +9,7 @@ ; CHECK-MID: renamable $lr = t2LoopDec killed renamable $lr, 1 ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 ; CHECK-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 ; CHECK-END: .LBB0_1: ; CHECK-END: b .LBB0_3 @@ -61,7 +61,7 @@ ; CHECK-MID: renamable $lr = t2LoopDec killed renamable $lr, 1 ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 ; CHECK-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_ugt_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -104,7 +104,7 @@ ; CHECK-MID: renamable $lr = t2LoopDec killed renamable $lr, 1 ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 ; CHECK-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_ult_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -145,9 +145,9 @@ ; CHECK-MID: name: check_loop_dec_ult_xor_brcond_combine ; CHECK-MIO: bb.2.for.body: ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 -; CHECK-MID: tB %bb.4, 14 +; CHECK-MID: t2B %bb.4, 14 ; CHECk-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_ult_xor_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -189,9 +189,9 @@ ; CHECK-MID: name: check_loop_dec_sgt_brcond_combine ; CHECK-MIO: bb.2.for.body: ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 -; CHECK-MID: tB %bb.4, 14 +; CHECK-MID: t2B %bb.4, 14 ; CHECk-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_sgt_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -232,9 +232,9 @@ ; CHECK-MID: name: check_loop_dec_sge_brcond_combine ; CHECK-MIO: bb.2.for.body: ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 -; CHECK-MID: tB %bb.4, 14 +; CHECK-MID: t2B %bb.4, 14 ; CHECk-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_sge_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -275,9 +275,9 @@ ; CHECK-MID: name: check_loop_dec_sge_xor_brcond_combine ; CHECK-MIO: bb.2.for.body: ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 -; CHECK-MID: tB %bb.4, 14 +; CHECK-MID: t2B %bb.4, 14 ; CHECk-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_sge_xor_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -319,9 +319,9 @@ ; CHECK-MID: name: check_loop_dec_uge_brcond_combine ; CHECK-MIO: bb.2.for.body: ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 -; CHECK-MID: tB %bb.4, 14 +; CHECK-MID: t2B %bb.4, 14 ; CHECk-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_uge_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -362,9 +362,9 @@ ; CHECK-MID: name: check_loop_dec_uge_xor_brcond_combine ; CHECK-MIO: bb.2.for.body: ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.3 -; CHECK-MID: tB %bb.4, 14 +; CHECK-MID: t2B %bb.4, 14 ; CHECk-MID: bb.3.for.header: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 define void @check_loop_dec_uge_xor_brcond_combine(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { entry: call void @llvm.set.loop.iterations.i32(i32 %N) @@ -405,11 +405,11 @@ ; CHECK-MID: check_negated_xor_wls ; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3 -; CHECK-MID: tB %bb.1 +; CHECK-MID: t2B %bb.1 ; CHECK-MID: bb.1.while.body.preheader: ; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1 ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.2 -; CHECk-MID: tB %bb.3 +; CHECk-MID: t2B %bb.3 ; CHECK-MID: bb.3.while.end: define void @check_negated_xor_wls(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) { entry: @@ -438,11 +438,11 @@ ; CHECK-MID: check_negated_cmp_wls ; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3 -; CHECK-MID: tB %bb.1 +; CHECK-MID: t2B %bb.1 ; CHECK-MID: bb.1.while.body.preheader: ; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1 ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.2 -; CHECk-MID: tB %bb.3 +; CHECk-MID: t2B %bb.3 ; CHECK-MID: bb.3.while.end: define void @check_negated_cmp_wls(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) { entry: @@ -471,11 +471,11 @@ ; CHECK-MID: check_negated_reordered_wls ; CHECK-MID: bb.1.while.body.preheader: -; CHECK-MID: tB %bb.2 +; CHECK-MID: t2B %bb.2 ; CHECK-MID: bb.2.while.body: ; CHECK-MID: t2LoopDec killed renamable $lr, 1 ; CHECK-MID: t2LoopEnd killed renamable $lr, %bb.2 -; CHECK-MID: tB %bb.4 +; CHECK-MID: t2B %bb.4 ; CHECK-MID: bb.3.while: ; CHECK-MID: t2WhileLoopStart {{.*}}, %bb.4 ; CHECK-MID: bb.4.while.end Index: llvm/test/CodeGen/ARM/machine-outliner.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner.mir @@ -0,0 +1,72 @@ +# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \ +# RUN: %s -o - | FileCheck %s + +--- | + define void @outline_1() #0 { ret void } + define void @outline_2() #0 { ret void } + define void @outline_3() #0 { ret void } + define void @dont_outline() { ret void } + + attributes #0 = { minsize optsize } +... +--- + +name: outline_1 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_2 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: outline_3 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK: OUTLINED + liveins: $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg +... +--- + +name: dont_outline +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: BL + liveins: $lr, $r2 + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + $r2 = MOVi 1, 14, $noreg, $noreg + BX_RET 14, $noreg Index: llvm/test/CodeGen/ARM/machine-outliner.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner.ll @@ -0,0 +1,135 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,THUMB +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -enable-linkonceodr-outlining -mtriple=arm-- < %s | FileCheck %s \ +; RUN: --check-prefix=ODR +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -enable-linkonceodr-outlining -mtriple=thumbv7-- < %s | FileCheck %s \ +; RUN: --check-prefix=ODR +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s \ +; RUN: --check-prefix=TARGET_FEATURES + + +; Make sure that we inherit target features from functions and make sure we have +; the right function attributes. +; TARGET_FEATURES: define internal void @OUTLINED_FUNCTION_{{[0-9]+}}() +; TARGET_FEATURES-SAME: #[[ATTR_NUM:[0-9]+]] +; TARGET_FEATURES-DAG: attributes #[[ATTR_NUM]] = { +; TARGET_FEATURES-SAME: minsize +; TARGET_FEATURES-SAME: optsize +; TARGET_FEATURES-SAME: "target-features"="+neon" + +define linkonce_odr void @fish() #0 { + ; CHECK-LABEL: fish: + ; CHECK-NOT: OUTLINED + ; ODR: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @turtle() section "TURTLE,turtle" { + ; CHECK-LABEL: turtle: + ; ODR-LABEL: turtle: + ; CHECK-NOT: OUTLINED + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @cat() #0 { + ; CHECK-LABEL: cat: + ; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]] + ; ODR: [[OUTLINED]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +define void @dog() #0 { + ; CHECK-LABEL: dog: + ; CHECK: [[OUTLINED]] + ; ODR: [[OUTLINED]] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + store i32 1, i32* %1, align 4 + store i32 2, i32* %2, align 4 + store i32 3, i32* %3, align 4 + store i32 4, i32* %4, align 4 + store i32 5, i32* %5, align 4 + store i32 6, i32* %6, align 4 + ret void +} + +; ODR: [[OUTLINED]]: +; ARM: .code 32 +; ARM-NEXT: [[OUTLINED]]: +; ARM: mov r0, #2 +; ARM-NEXT: str r0, [sp, #16] +; ARM-NEXT: mov r0, #1 +; ARM-NEXT: str r0, [sp, #20] +; ARM-NEXT: mov r0, #3 +; ARM-NEXT: str r0, [sp, #12] +; ARM-NEXT: mov r0, #4 +; ARM-NEXT: str r0, [sp, #8] +; ARM-NEXT: mov r0, #5 +; ARM-NEXT: str r0, [sp, #4] +; ARM-NEXT: mov r0, #6 +; ARM-NEXT: str r0, [sp] +; ARM-NEXT: mov pc, lr + +; THUMB: .code 16 +; THUMB-NEXT: .thumb_func +; THUMB-NEXT: [[OUTLINED]]: +; THUMB: movs r0, #2 +; THUMB-NEXT: str r0, [sp, #16] +; THUMB-NEXT: movs r0, #1 +; THUMB-NEXT: str r0, [sp, #20] +; THUMB-NEXT: movs r0, #3 +; THUMB-NEXT: str r0, [sp, #12] +; THUMB-NEXT: movs r0, #4 +; THUMB-NEXT: str r0, [sp, #8] +; THUMB-NEXT: movs r0, #5 +; THUMB-NEXT: str r0, [sp, #4] +; THUMB-NEXT: movs r0, #6 +; THUMB-NEXT: str r0, [sp] +; THUMB-NEXT: bx lr + +attributes #0 = { nounwind "target-cpu"="cortex-a53" "target-features"="+neon" } Index: llvm/test/CodeGen/ARM/machine-outliner-thunk.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-thunk.ll @@ -0,0 +1,111 @@ +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -enable-machine-outliner -verify-machineinstrs \ +; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; ARM-LABEL: name: a +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 8, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: a +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: a +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; ARM-LABEL: name: b +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 88, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: b +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: b +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: bb.0: +; ARM-NEXT: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @thunk_called_fn, implicit $sp + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: bb.0: +; THUMB-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @thunk_called_fn, 14, $noreg, implicit $sp + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: bb.0: +; MACHO-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @thunk_called_fn, 14, $noreg, implicit $sp Index: llvm/test/CodeGen/ARM/machine-outliner-tail.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-tail.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @z + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @z, 14, $noreg + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @z, 14, $noreg + +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} Index: llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir @@ -0,0 +1,334 @@ +# RUN: llc -mtriple=thumbv7-- -run-pass=prologepilog \ +# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @CheckAddrModeT2_i12() { ret void } + define void @CheckAddrModeT2_i8() { ret void } + define void @CheckAddrModeT2_i8s4() { ret void } + define void @CheckAddrModeT2_ldrex() { ret void } + define void @CheckAddrModeT2_i7() { ret void } + define void @CheckAddrModeT2_i7s2() { ret void } + define void @CheckAddrModeT2_i7s4() { ret void } + define void @CheckAddrModeT1_s() { ret void } + define void @foo() { ret void } + +... +--- + +name: CheckAddrModeT2_i12 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0 + ;CHECK-LABEL: name: CheckAddrModeT2_i12 + ;CHECK: $r1 = tMOVr killed $r0, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I12:[0-9]+]] + ;CHECK-NEXT: $r5 = t2LDRi12 $sp, 4088, 14, $noreg + $r1 = tMOVr killed $r0, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r1 = t2LDRi12 $sp, 0, 14, $noreg + $r2 = t2LDRi12 $sp, 8, 14, $noreg + $r3 = t2LDRi12 $sp, 10, 14, $noreg + $r4 = t2LDRi12 $sp, 4086, 14, $noreg + $r5 = t2LDRi12 $sp, 4088, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r1 = t2LDRi12 $sp, 0, 14, $noreg + $r2 = t2LDRi12 $sp, 8, 14, $noreg + $r3 = t2LDRi12 $sp, 10, 14, $noreg + $r4 = t2LDRi12 $sp, 4086, 14, $noreg + $r5 = t2LDRi12 $sp, 4088, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i8 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r1, $r2, $r3, $r4 + ;CHECK-LABEL: name: CheckAddrModeT2_i8 + ;CHECK: $r0 = tMOVr $r1, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I8:[0-9]+]] + ;CHECK-NEXT: t2STRHi8 $r4, $sp, 248, 14, $noreg + $r0 = tMOVr $r1, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRHi8 $r0, $sp, 0, 14, $noreg + t2STRHi8 $r1, $sp, 8, 14, $noreg + t2STRHi8 $r2, $sp, 10, 14, $noreg + t2STRHi8 $r3, $sp, 247, 14, $noreg + t2STRHi8 $r4, $sp, 248, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRHi8 $r0, $sp, 0, 14, $noreg + t2STRHi8 $r1, $sp, 8, 14, $noreg + t2STRHi8 $r2, $sp, 10, 14, $noreg + t2STRHi8 $r3, $sp, 247, 14, $noreg + t2STRHi8 $r4, $sp, 248, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i8s4 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r1, $r2, $r3, $r4, $r5 + ;CHECK-LABEL: name: CheckAddrModeT2_i8s4 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I8S4:[0-9]+]] + ;CHECK-NEXT: t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg + t2STRDi8 $r1, $r2, $sp, 8, 14, $noreg + t2STRDi8 $r2, $r3, $sp, 10, 14, $noreg + t2STRDi8 $r3, $r4, $sp, 253, 14, $noreg + t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg + t2STRDi8 $r1, $r2, $sp, 8, 14, $noreg + t2STRDi8 $r2, $r3, $sp, 10, 14, $noreg + t2STRDi8 $r3, $r4, $sp, 253, 14, $noreg + t2STRDi8 $r4, $r5, $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_ldrex +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r1, $r2, $r3, $r4, $r5 + ;CHECK-LABEL: name: CheckAddrModeT2_ldrex + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[LDREX:[0-9]+]] + ;CHECK-NEXT: t2LDREX $sp, 254, 14, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r0 = t2LDREX $sp, 0, 14, $noreg + $r1 = t2LDREX $sp, 8, 14, $noreg + $r2 = t2LDREX $sp, 10, 14, $noreg + $r3 = t2LDREX $sp, 253, 14, $noreg + $r4 = t2LDREX $sp, 254, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + $r0 = t2LDREX $sp, 0, 14, $noreg + $r1 = t2LDREX $sp, 8, 14, $noreg + $r2 = t2LDREX $sp, 10, 14, $noreg + $r3 = t2LDREX $sp, 253, 14, $noreg + $r4 = t2LDREX $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i7 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r2, $q0, $q1, $q2, $q3, $q4 + ;CHECK-LABEL: name: CheckAddrModeT2_i7 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7:[0-9]+]] + ;CHECK-NEXT: MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRBU8 $q0, $sp, 0, 0, $noreg + MVE_VSTRBU8 $q1, $sp, 8, 0, $noreg + MVE_VSTRBU8 $q2, $sp, 10, 0, $noreg + MVE_VSTRBU8 $q3, $sp, 119, 0, $noreg + MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRBU8 $q0, $sp, 0, 0, $noreg + MVE_VSTRBU8 $q1, $sp, 8, 0, $noreg + MVE_VSTRBU8 $q2, $sp, 10, 0, $noreg + MVE_VSTRBU8 $q3, $sp, 119, 0, $noreg + MVE_VSTRBU8 $q4, $sp, 120, 0, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i7s2 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r2, $q0, $q1, $q2, $q3, $q4 + ;CHECK-LABEL: name: CheckAddrModeT2_i7s2 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7S2:[0-9]+]] + ;CHECK-NEXT: MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRHU16 $q0, $sp, 0, 0, $noreg + MVE_VSTRHU16 $q1, $sp, 8, 0, $noreg + MVE_VSTRHU16 $q2, $sp, 10, 0, $noreg + MVE_VSTRHU16 $q3, $sp, 119, 0, $noreg + MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRHU16 $q0, $sp, 0, 0, $noreg + MVE_VSTRHU16 $q1, $sp, 8, 0, $noreg + MVE_VSTRHU16 $q2, $sp, 10, 0, $noreg + MVE_VSTRHU16 $q3, $sp, 119, 0, $noreg + MVE_VSTRHU16 $q4, $sp, 124, 0, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT2_i7s4 +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r2, $q0, $q1, $q2, $q3, $q4 + ;CHECK-LABEL: name: CheckAddrModeT2_i7s4 + ;CHECK: $r0 = tMOVr $r2, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[I7S4:[0-9]+]] + ;CHECK-NEXT: MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg + $r0 = tMOVr $r2, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRWU32 $q0, $sp, 0, 0, $noreg + MVE_VSTRWU32 $q1, $sp, 8, 0, $noreg + MVE_VSTRWU32 $q2, $sp, 10, 0, $noreg + MVE_VSTRWU32 $q3, $sp, 125, 0, $noreg + MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + MVE_VSTRWU32 $q0, $sp, 0, 0, $noreg + MVE_VSTRWU32 $q1, $sp, 8, 0, $noreg + MVE_VSTRWU32 $q2, $sp, 10, 0, $noreg + MVE_VSTRWU32 $q3, $sp, 125, 0, $noreg + MVE_VSTRWU32 $q4, $sp, 126, 0, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrModeT1_s +tracksRegLiveness: true + +body: | + bb.0: + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6 + ;CHECK-LABEL: name: CheckAddrModeT1_s + ;CHECK: $r0 = tMOVr $r3, 14, $noreg + ;CHECK-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_[[T1_S:[0-9]+]] + ;CHECK-NEXT: tSTRspi $r6, $sp, 254, 14, $noreg + $r0 = tMOVr $r3, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + tSTRspi $r0, $sp, 0, 14, $noreg + tSTRspi $r1, $sp, 4, 14, $noreg + tSTRspi $r2, $sp, 8, 14, $noreg + tSTRspi $r3, $sp, 12, 14, $noreg + tSTRspi $r4, $sp, 16, 14, $noreg + tSTRspi $r5, $sp, 253, 14, $noreg + tSTRspi $r6, $sp, 254, 14, $noreg + tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + tSTRspi $r0, $sp, 0, 14, $noreg + tSTRspi $r1, $sp, 4, 14, $noreg + tSTRspi $r2, $sp, 8, 14, $noreg + tSTRspi $r3, $sp, 12, 14, $noreg + tSTRspi $r4, $sp, 16, 14, $noreg + tSTRspi $r5, $sp, 253, 14, $noreg + tSTRspi $r6, $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: foo +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + + BX_RET 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I7S4]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: MVE_VSTRWU32 $q0, $sp, 2, 0, $noreg + ;CHECK-NEXT: MVE_VSTRWU32 $q1, $sp, 10, 0, $noreg + ;CHECK-NEXT: MVE_VSTRWU32 $q2, $sp, 12, 0, $noreg + ;CHECK-NEXT: MVE_VSTRWU32 $q3, $sp, 127, 0, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I8]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: t2STRHi8 $r0, $sp, 8, 14, $noreg + ;CHECK-NEXT: t2STRHi8 $r1, $sp, 16, 14, $noreg + ;CHECK-NEXT: t2STRHi8 $r2, $sp, 18, 14, $noreg + ;CHECK-NEXT: t2STRHi8 $r3, $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I7S2]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: MVE_VSTRHU16 $q0, $sp, 4, 0, $noreg + ;CHECK-NEXT: MVE_VSTRHU16 $q1, $sp, 12, 0, $noreg + ;CHECK-NEXT: MVE_VSTRHU16 $q2, $sp, 14, 0, $noreg + ;CHECK-NEXT: MVE_VSTRHU16 $q3, $sp, 123, 0, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[LDREX]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r0 = t2LDREX $sp, 2, 14, $noreg + ;CHECK-NEXT: $r1 = t2LDREX $sp, 10, 14, $noreg + ;CHECK-NEXT: $r2 = t2LDREX $sp, 12, 14, $noreg + ;CHECK-NEXT: $r3 = t2LDREX $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I7]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: MVE_VSTRBU8 $q0, $sp, 8, 0, $noreg + ;CHECK-NEXT: MVE_VSTRBU8 $q1, $sp, 16, 0, $noreg + ;CHECK-NEXT: MVE_VSTRBU8 $q2, $sp, 18, 0, $noreg + ;CHECK-NEXT: MVE_VSTRBU8 $q3, $sp, 127, 0, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I12]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r1 = t2LDRi12 $sp, 8, 14, $noreg + ;CHECK-NEXT: $r2 = t2LDRi12 $sp, 16, 14, $noreg + ;CHECK-NEXT: $r3 = t2LDRi12 $sp, 18, 14, $noreg + ;CHECK-NEXT: $r4 = t2LDRi12 $sp, 4094, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I8S4]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 2, 14, $noreg + ;CHECK-NEXT: t2STRDi8 $r1, $r2, $sp, 10, 14, $noreg + ;CHECK-NEXT: t2STRDi8 $r2, $r3, $sp, 12, 14, $noreg + ;CHECK-NEXT: t2STRDi8 $r3, $r4, $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[T1_S]] + ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: tSTRspi $r0, $sp, 2, 14, $noreg + ;CHECK-NEXT: tSTRspi $r1, $sp, 6, 14, $noreg + ;CHECK-NEXT: tSTRspi $r2, $sp, 10, 14, $noreg + ;CHECK-NEXT: tSTRspi $r3, $sp, 14, 14, $noreg + ;CHECK-NEXT: tSTRspi $r4, $sp, 18, 14, $noreg + ;CHECK-NEXT: tSTRspi $r5, $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14, $noreg Index: llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir @@ -0,0 +1,169 @@ +# RUN: llc -mtriple=armv7-- -run-pass=prologepilog -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @CheckAddrMode_i12() { ret void } + define void @CheckAddrMode3() { ret void } + define void @CheckAddrMode5() { ret void } + define void @CheckAddrMode5FP16() { ret void } + define void @foo() { ret void } + +... +--- + +name: CheckAddrMode_i12 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0 + ; CHECK-LABEL: name: CheckAddrMode_i12 + ; CHECK: $r1 = MOVr killed $r0, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I12:[0-9]+]] + ; CHECK-NEXT: $r5 = LDRi12 $sp, 4088, 14, $noreg + $r1 = MOVr killed $r0, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRi12 $sp, 0, 14, $noreg + $r2 = LDRi12 $sp, 8, 14, $noreg + $r3 = LDRi12 $sp, 10, 14, $noreg + $r4 = LDRi12 $sp, 4086, 14, $noreg + $r5 = LDRi12 $sp, 4088, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRi12 $sp, 0, 14, $noreg + $r2 = LDRi12 $sp, 8, 14, $noreg + $r3 = LDRi12 $sp, 10, 14, $noreg + $r4 = LDRi12 $sp, 4086, 14, $noreg + $r5 = LDRi12 $sp, 4088, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrMode3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r1 + ; CHECK-LABEL: name: CheckAddrMode3 + ; CHECK: $r0 = MOVr killed $r1, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I3:[0-9]+]] + ; CHECK-NEXT: $r5 = LDRSH $sp, $noreg, 248, 14, $noreg + $r0 = MOVr killed $r1, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRSH $sp, $noreg, 0, 14, $noreg + $r2 = LDRSH $sp, $noreg, 8, 14, $noreg + $r3 = LDRSH $sp, $noreg, 10, 14, $noreg + $r4 = LDRSH $sp, $noreg, 247, 14, $noreg + $r5 = LDRSH $sp, $noreg, 248, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $r1 = LDRSH $sp, $noreg, 0, 14, $noreg + $r2 = LDRSH $sp, $noreg, 8, 14, $noreg + $r3 = LDRSH $sp, $noreg, 10, 14, $noreg + $r4 = LDRSH $sp, $noreg, 247, 14, $noreg + $r5 = LDRSH $sp, $noreg, 248, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrMode5 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r2 + ; CHECK-LABEL: name: CheckAddrMode5 + ; CHECK: $r0 = MOVr killed $r2, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5:[0-9]+]] + ; CHECK-NEXT: $d4 = VLDRD $sp, 254, 14, $noreg + $r0 = MOVr killed $r2, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $d0 = VLDRD $sp, 0, 14, $noreg + $d1 = VLDRD $sp, 8, 14, $noreg + $d2 = VLDRD $sp, 10, 14, $noreg + $d3 = VLDRD $sp, 253, 14, $noreg + $d4 = VLDRD $sp, 254, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $d0 = VLDRD $sp, 0, 14, $noreg + $d1 = VLDRD $sp, 8, 14, $noreg + $d2 = VLDRD $sp, 10, 14, $noreg + $d3 = VLDRD $sp, 253, 14, $noreg + $d4 = VLDRD $sp, 254, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: CheckAddrMode5FP16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r3 + ; CHECK-LABEL: name: CheckAddrMode5FP16 + ; CHECK: $r0 = MOVr killed $r3, 14, $noreg, $noreg + ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5FP16:[0-9]+]] + ; CHECK-NEXT: $s5 = VLDRH $sp, 252, 14, $noreg + $r0 = MOVr killed $r3, 14, $noreg, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $s1 = VLDRH $sp, 0, 14, $noreg + $s2 = VLDRH $sp, 8, 14, $noreg + $s3 = VLDRH $sp, 10, 14, $noreg + $s4 = VLDRH $sp, 240, 14, $noreg + $s5 = VLDRH $sp, 252, 14, $noreg + BL @foo, implicit-def dead $lr, implicit $sp + $s1 = VLDRH $sp, 0, 14, $noreg + $s2 = VLDRH $sp, 8, 14, $noreg + $s3 = VLDRH $sp, 10, 14, $noreg + $s4 = VLDRH $sp, 240, 14, $noreg + $s5 = VLDRH $sp, 252, 14, $noreg + BX_RET 14, $noreg +... +--- + +name: foo +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + + BX_RET 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I5FP16]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $s1 = VLDRH $sp, 4, 14, $noreg + ;CHECK-NEXT: $s2 = VLDRH $sp, 12, 14, $noreg + ;CHECK-NEXT: $s3 = VLDRH $sp, 14, 14, $noreg + ;CHECK-NEXT: $s4 = VLDRH $sp, 244, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I3]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r1 = LDRSH $sp, $noreg, 8, 14, $noreg + ;CHECK-NEXT: $r2 = LDRSH $sp, $noreg, 16, 14, $noreg + ;CHECK-NEXT: $r3 = LDRSH $sp, $noreg, 18, 14, $noreg + ;CHECK-NEXT: $r4 = LDRSH $sp, $noreg, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I5]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $d0 = VLDRD $sp, 2, 14, $noreg + ;CHECK-NEXT: $d1 = VLDRD $sp, 10, 14, $noreg + ;CHECK-NEXT: $d2 = VLDRD $sp, 12, 14, $noreg + ;CHECK-NEXT: $d3 = VLDRD $sp, 255, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + + ;CHECK: name: OUTLINED_FUNCTION_[[I12]] + ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14, $noreg + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, 8 + ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp + ;CHECK-NEXT: $r1 = LDRi12 $sp, 8, 14, $noreg + ;CHECK-NEXT: $r2 = LDRi12 $sp, 16, 14, $noreg + ;CHECK-NEXT: $r3 = LDRi12 $sp, 18, 14, $noreg + ;CHECK-NEXT: $r4 = LDRi12 $sp, 4094, 14, $noreg + ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14, $noreg + Index: llvm/test/CodeGen/ARM/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/ARM/O3-pipeline.ll +++ llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -158,8 +158,6 @@ ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: optimise barriers pass ; CHECK-NEXT: MachineDominator Tree Construction -; CHECK-NEXT: ARM constant island placement and branch shortening pass -; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: ReachingDefAnalysis ; CHECK-NEXT: ARM Low Overhead Loops pass @@ -169,6 +167,8 @@ ; CHECK-NEXT: Insert fentry calls ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: ARM constant island placement and branch shortening pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: ARM Assembly Printer Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -243,6 +243,10 @@ } initAsmInfo(); + + // ARM supports the MachineOutliner. + setMachineOutliner(true); + setSupportsDefaultOutlining(false); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; @@ -358,6 +362,7 @@ void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + void addPreEmitPass2() override; std::unique_ptr<CSEConfigBase> getCSEConfig() const override; }; @@ -536,8 +541,11 @@ if (getOptLevel() != CodeGenOpt::None) addPass(createARMOptimizeBarriersPass()); - addPass(createARMConstantIslandPass()); addPass(createARMLowOverheadLoopsPass()); +} + +void ARMPassConfig::addPreEmitPass2() { + addPass(createARMConstantIslandPass()); // Identify valid longjmp targets for Windows Control Flow Guard. if (TM->getTargetTriple().isOSWindows()) Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -343,7 +343,48 @@ ArrayRef<std::pair<unsigned, const char *>> getSerializableBitmaskMachineOperandTargetFlags() const override; + /// ARM supports the MachineOutliner. + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; + outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const override; + bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const override; + void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, MachineFunction &MF, + const outliner::Candidate &C) const override; + private: + /// \brief Sets the offsets on outlined instructions in \p MBB which use SP + /// so that they will be valid post-outlining. + /// + /// \param MBB A \p MachineBasicBlock in an outlined function. + void fixupPostOutline(MachineBasicBlock &MBB) const; + + /// Returns an unused general-purpose register which can be used for + /// constructing an outlined call if one exists. Returns 0 otherwise. + unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; + + /// Adds an instruction which saves the link register on top of the stack into + /// the MachineBasicBlock \p MBB at position \p It. + void saveLROnStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const; + + /// Adds an instruction which restores the link register from the top the + /// stack into the MachineBasicBlock \p MBB at position \p It. + void restoreLRFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const; + + /// Returns true if the machine instruction offset can handle the stack fixup + /// and updates it if requested. + bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup, + bool Updt) const; + unsigned getInstBundleLength(const MachineInstr &MI) const; int getVLDMDefCycle(const InstrItineraryData *ItinData, Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -5443,3 +5444,891 @@ return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); } + +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | Yes | Yes | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerRegSave implies that the function should be called with a +/// save and restore of LR to an available register. This allows us to avoid +/// stack fixups. Note that this outlining variant is compatible with the +/// NoLRSave case. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 8 | 12 | +/// | Frame overhead in Bytes | 2 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// BX LR I2 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 4 | 4 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ + +enum MachineOutlinerClass { + MachineOutlinerDefault, /// Emit a save, restore, call, and return. + MachineOutlinerRegSave, /// Same as default, but save to a register. + MachineOutlinerTailCall, /// Only emit a branch. + MachineOutlinerThunk, /// Emit a call and tail-call. + MachineOutlinerNoLRSave /// Emit a call and return. +}; + +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4, + UnsafeRegsDead = 0x8 +}; + +struct OutlinerCosts { + const int CallDefault; + const int FrameDefault; + const int CallRegSave; + const int FrameRegSave; + const int CallNoLRSave; + const int FrameNoLRSave; + const int CallTailCall; + const int FrameTailCall; + const int CallThunk; + const int FrameThunk; + + OutlinerCosts(const ARMSubtarget &target) + : CallDefault(target.isThumb() ? 8 : 12), + FrameDefault(target.isThumb() ? 2 : 4), + CallRegSave(target.isThumb() ? 8 : 12), + FrameRegSave(target.isThumb() ? 2 : 4), + CallNoLRSave(target.isThumb() ? 4 : 4), + FrameNoLRSave(target.isThumb() ? 4 : 4), + CallTailCall(target.isThumb() ? 4 : 4), + FrameTailCall(target.isThumb() ? 0 : 0), + CallThunk(target.isThumb() ? 4 : 4), + FrameThunk(target.isThumb() ? 0 : 0) {} +}; + +unsigned +ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const { + assert(C.LRUWasSet && "LRU wasn't set?"); + MachineFunction *MF = C.getMF(); + const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + + BitVector regsReserved = ARI->getReservedRegs(*MF); + // Check if there is an available register across the sequence that we can + // use. + for (unsigned Reg : + (Subtarget.isThumb() ? ARM::tGPRRegClass : ARM::GPRRegClass)) { + if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) && + Reg != ARM::LR && // LR is not reserved, but don't use it. + Reg != ARM::R12 && // R12 is not guaranteed to be preserved. + C.LRU.available(Reg) && C.UsedInSequence.available(Reg)) + return Reg; + } + + // No suitable register. Return 0. + return 0u; +} + +outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( + std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { + outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; + unsigned SequenceSize = + std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + if (Subtarget.isThumb1Only()) + return outliner::OutlinedFunction(); + + // Properties about candidate MBBs that hold for all of them. + unsigned FlagsSetInAll = 0xF; + + // Compute liveness information for each candidate, and set FlagsSetInAll. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each( + RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + + // According to the ARM Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Register R12(IP), + // * Condition codes (and thus the CPSR register) + // + // Because of this, we can't outline any sequence of instructions where one + // of these registers is live into/across it. Thus, we need to delete those + // candidates. + auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { + // If the unsafe registers in this block are all dead, then we don't need + // to compute liveness here. + if (C.Flags & UnsafeRegsDead) + return false; + C.initLRU(TRI); + LiveRegUnits LRU = C.LRU; + return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + }; + + // Are there any candidates where those registers are live? + if (!(FlagsSetInAll & UnsafeRegsDead)) { + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. + + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + OutlinerCosts *Costs = new OutlinerCosts(Subtarget); + unsigned FrameID = MachineOutlinerDefault; + unsigned NumBytesToCreateFrame = Costs->FrameDefault; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (RepeatedSequenceLocs[0].back()->isTerminator()) { + FrameID = MachineOutlinerTailCall; + NumBytesToCreateFrame = Costs->FrameTailCall; + SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall); + } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || + LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::tBLXi) { + // FIXME: Do we need to check if the code after this uses the value of LR? + FrameID = MachineOutlinerThunk; + NumBytesToCreateFrame = Costs->FrameThunk; + SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk); + } else { + // We need to decide how to emit calls + frames. We can always emit the same + // frame if we don't need to save to the stack. If we have to save to the + // stack, then we need a different frame. + unsigned NumBytesNoStackCalls = 0; + std::vector<outliner::Candidate> CandidatesWithoutStackFixups; + + for (outliner::Candidate &C : RepeatedSequenceLocs) { + C.initLRU(TRI); + + // Is LR available? If so, we don't need a save. + if (C.LRU.available(ARM::LR)) { + NumBytesNoStackCalls += Costs->CallNoLRSave; + C.setCallInfo(MachineOutlinerNoLRSave, Costs->CallNoLRSave); + CandidatesWithoutStackFixups.push_back(C); + } + + // Is an unused register available? If so, we won't modify the stack, so + // we can outline with the same frame type as those that don't save LR. + else if (findRegisterToSaveLRTo(C)) { + NumBytesNoStackCalls += Costs->CallRegSave; + C.setCallInfo(MachineOutlinerRegSave, Costs->CallRegSave); + CandidatesWithoutStackFixups.push_back(C); + } + + // Is SP used in the sequence at all? If not, we don't have to modify + // the stack, so we are guaranteed to get the same frame. + else if (C.UsedInSequence.available(ARM::SP)) { + NumBytesNoStackCalls += Costs->CallDefault; + C.setCallInfo(MachineOutlinerDefault, Costs->CallDefault); + CandidatesWithoutStackFixups.push_back(C); + } + + // If we outline this, we need to modify the stack. Pretend we don't + // outline this by saving all of its bytes. + else { + NumBytesNoStackCalls += SequenceSize; + } + } + + // If there are no places where we have to save LR, then note that we don't + // have to update the stack. Otherwise, give every candidate the default + // call type. + if (NumBytesNoStackCalls <= + RepeatedSequenceLocs.size() * Costs->CallDefault) { + RepeatedSequenceLocs = CandidatesWithoutStackFixups; + FrameID = MachineOutlinerNoLRSave; + } else { + SetCandidateCallInfo(MachineOutlinerDefault, Costs->CallDefault); + } + } + + // Does every candidate's MBB contain a call? If so, then we might have a + // call in the range. + if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { + // Check if the range contains a call. These require a save + restore of the + // link register. + if (std::any_of(FirstCand.front(), FirstCand.back(), + [](const MachineInstr &MI) { return MI.isCall(); })) + NumBytesToCreateFrame += Costs->FrameDefault; + + // Handle the last instruction separately. If this is a tail call, then the + // last instruction is a call. We don't want to save + restore in this case. + // However, it could be possible that the last instruction is a call without + // it being valid to tail call this sequence. We should consider this as + // well. + else if (FrameID != MachineOutlinerThunk && + FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) + NumBytesToCreateFrame += Costs->FrameThunk; + } + + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, + NumBytesToCreateFrame, FrameID); +} + +bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) + return false; + + // Don't outline from functions with section markings; the program could + // expect that all the code is in the named section. + // FIXME: Allow outlining from multiple functions with the same section + // marking. + if (F.hasSection()) + return false; + + // It's safe to outline from MF. + return true; +} + +bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + assert(MBB.getParent()->getRegInfo().tracksLiveness() && + "Suitable Machine Function for outlining must track liveness"); + + LiveRegUnits LRU(getRegisterInfo()); + + std::for_each(MBB.rbegin(), MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + // Check if each of the unsafe registers are available... + bool R12AvailableInBlock = LRU.available(ARM::R12); + bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); + + // If all of these are dead (and not live out), we know we don't have to check + // them later. + if (R12AvailableInBlock && CPSRAvailableInBlock) + Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + + // Now, add the live outs the set. + LRU.addLiveOuts(MBB); + + // If any of these registers is available in the MBB, but also a live out of + // the block, then we know outlining is unsafe. + if (R12AvailableInBlock && !LRU.available(ARM::R12)) + return false; + if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR)) + return false; + + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + if (!LRU.available(ARM::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return true; +} + +bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, + int64_t Fixup, + bool Updt) const { + int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP); + + if (SPIdx < 0) + // No SP operand + return true; + + unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); + + // Stack might be involved but addressing mode doesn't handle any offset. + // Rq: AddrModeT1_[1|2|4] don't operate on SP + if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions + || AddrMode == ARMII::AddrMode4 // Load/Store Multiple + || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple + || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register + || AddrMode == ARMII::AddrModeT2_pc // PCrel access + || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST + || AddrMode == ARMII::AddrModeNone) + return false; + + // If SP is not the base register we can't do much + if (SPIdx != 1) { + if (AddrMode != ARMII::AddrModeT2_i8s4) + return false; + else if (SPIdx != 2) + return false; + } + + unsigned NumOps = MI->getDesc().getNumOperands(); + unsigned ImmIdx = NumOps - 3; + + const MachineOperand &Offset = MI->getOperand(ImmIdx); + assert(Offset.isImm() && "Is not an immediate"); + int64_t OffVal = Offset.getImm(); + + if (OffVal < 0) + // Don't override data if the are below SP. + return false; + + unsigned NumBits = 0; + unsigned Scale = 1; + + switch (AddrMode) { + case ARMII::AddrMode3: + if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub) + return false; + OffVal = ARM_AM::getAM3Offset(OffVal); + NumBits = 8; + break; + case ARMII::AddrMode5: + if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub) + return false; + OffVal = ARM_AM::getAM5Offset(OffVal); + NumBits = 8; + Scale = 4; + break; + case ARMII::AddrMode5FP16: + if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub) + return false; + OffVal = ARM_AM::getAM5FP16Offset(OffVal); + NumBits = 8; + Scale = 2; + break; + case ARMII::AddrModeT2_i8: + NumBits = 8; + break; + case ARMII::AddrModeT2_i8s4: + case ARMII::AddrModeT2_ldrex: + NumBits = 8; + Scale = 4; + break; + case ARMII::AddrModeT2_i12: + case ARMII::AddrMode_i12: + NumBits = 12; + break; + case ARMII::AddrModeT2_i7: + NumBits = 7; + break; + case ARMII::AddrModeT2_i7s2: + NumBits = 7; + Scale = 2; + break; + case ARMII::AddrModeT2_i7s4: + NumBits = 7; + Scale = 4; + break; + case ARMII::AddrModeT1_s: // SP-relative LD/ST + NumBits = 8; + Scale = 4; + break; + default: + llvm_unreachable("Unsupported addressing mode!"); + } + // Make sure the offset is encodable for instructions that scale the + // immediate. + if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0) + return false; + + OffVal += Fixup / Scale; + + unsigned Mask = (1 << NumBits) - 1; + + if (OffVal <= Mask) { + if (Updt) + MI->getOperand(ImmIdx).setImm(OffVal); + return true; + } + + return false; +} + +outliner::InstrType +ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // Be conservative with inline ASM + if (MI.isInlineAsm()) + return outliner::InstrType::Invisible; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugInstr() || MI.isIndirectDebugValue()) + return outliner::InstrType::Invisible; + + // At this point, KILL instructions don't really tell us much so we can go + // ahead and skip over them. + if (MI.isKill()) + return outliner::InstrType::Invisible; + + // PIC instructions contain labels, outlining them would break offset + // computing. + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case ARM::t2IT: + case ARM::tPICADD: // This is used in Thumb1 and Thumb2 + case ARM::PICADD: + case ARM::PICSTR: + case ARM::PICSTRB: + case ARM::PICSTRH: + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICLDRH: + case ARM::PICLDRSB: + case ARM::PICLDRSH: + case ARM::t2LDRpci_pic: + case ARM::t2MOVi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: + case ARM::t2MOV_ga_pcrel: + return outliner::InstrType::Illegal; + default: + break; + } + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + // Don't outline if the branch is not unconditional. + if (Opc == ARM::BX_RET || Opc == ARM::tBX_RET || Opc == ARM::MOVPCLR) { + if (MI.getOperand(0).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + if (Opc == ARM::LDMIA_RET) { + if (MI.getOperand(2).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return outliner::InstrType::Legal; + + // It's not, so don't outline it. + return outliner::InstrType::Illegal; + } + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return outliner::InstrType::Illegal; + + // If it uses LR then don't touch it. + if (MOP.isReg() && !MOP.isImplicit() && (MOP.getReg() == ARM::LR)) + return outliner::InstrType::Illegal; + } + + // If MI is a call we might be able to outline it. We don't want to outline + // any calls that rely on the position of items on the stack. When we outline + // something containing a call, we have to emit a save and restore of LR in + // the outlined function. Currently, this always happens by saving LR to the + // stack. Thus, if we outline, say, half the parameters for a function call + // plus the call, then we'll break the callee's expectations for the layout + // of the stack. + // + // FIXME: Allow calls to functions which construct a stack frame, as long + // as they don't access arguments on the stack. + // FIXME: Figure out some way to analyze functions defined in other modules. + // We should be able to compute the memory usage based on the IR calling + // convention, even if we can't see the definition. + if (MI.isCall()) { + // Get the function associated with the call. Look at each operand and find + // the one that represents the callee and get its name. + const Function *Callee = nullptr; + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isGlobal()) { + Callee = dyn_cast<Function>(MOP.getGlobal()); + break; + } + } + + // Never outline calls to mcount. There isn't any rule that would require + // this, but the Linux kernel's "ftrace" feature depends on it. + if (Callee && Callee->getName() == "\01_mcount") + return outliner::InstrType::Illegal; + + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Whitelist the call instructions we know about so we + // don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = outliner::InstrType::Illegal; + if (MI.getOpcode() == ARM::BL || MI.getOpcode() == ARM::tBL || + MI.getOpcode() == ARM::BLX || MI.getOpcode() == ARM::tBLXr || + MI.getOpcode() == ARM::tBLXi) + UnknownCallOutlineType = outliner::InstrType::LegalTerminator; + + if (!Callee) + return UnknownCallOutlineType; + + // We have a function we have information about. Check it if it's something + // can safely outline. + MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); + + // We don't know what's going on with the callee at all. Don't touch it. + if (!CalleeMF) + return UnknownCallOutlineType; + + // Check if we know anything about the callee saves on the function. If we + // don't, then don't touch it, since that implies that we haven't + // computed anything about its stack frame yet. + MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || + MFI.getNumObjects() > 0) + return UnknownCallOutlineType; + + // At this point, we can say that CalleeMF ought to not pass anything on the + // stack. Therefore, we can outline it. + return outliner::InstrType::Legal; + } + + // Don't outline positions. + if (MI.isPosition()) + return outliner::InstrType::Illegal; + + // Don't touch the link register + if (MI.readsRegister(ARM::LR, &getRegisterInfo()) || + MI.modifiesRegister(ARM::LR, &getRegisterInfo())) + return outliner::InstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) { + // True if there is no chance that any outlined candidate from this range + // could require stack fixups. That is, both + // * LR is available in the range (No save/restore around call) + // * The range doesn't include calls (No save/restore in outlined frame) + // are true. + // FIXME: This is very restrictive; the flags check the whole block, + // not just the bit we will try to outline. + bool MightNeedStackFixUp = + (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | + MachineOutlinerMBBFlags::HasCalls)); + + // If this instruction is in a range where it *never* needs to be fixed + // up, then we can *always* outline it. This is true even if it's not + // possible to fix that instruction up. + // + // Why? Consider two equivalent instructions I1, I2 where both I1 and I2 + // use SP. Suppose that I1 sits within a range that definitely doesn't + // need stack fixups, while I2 sits in a range that does. + // + // First, I1 can be outlined as long as we *never* fix up the stack in + // any sequence containing it. I1 is already a safe instruction in the + // original program, so as long as we don't modify it we're good to go. + // So this leaves us with showing that outlining I2 won't break our + // program. + // + // Suppose I1 and I2 belong to equivalent candidate sequences. When we + // look at I2, we need to see if it can be fixed up. Suppose I2, (and + // thus I1) cannot be fixed up. Then I2 will be assigned an unique + // integer label; thus, I2 cannot belong to any candidate sequence (a + // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up + // as well, so we're good. Thus, I1 is always safe to outline. + if (!MightNeedStackFixUp) + return outliner::InstrType::Legal; + + // Any modification of SP will break our code to save/restore LR. + // FIXME: We could handle some instructions which add a constant offset to + // SP, with a bit more work. + if (MI.modifiesRegister(ARM::SP, TRI)) + return outliner::InstrType::Illegal; + + // At this point, we have a stack instruction that we might need to fix up. + // up. We'll handle it if it's a load or store. + if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), + false)) + // It's in range, so we can outline it. + return outliner::InstrType::Legal; + + // We can't fix it up, so don't outline it. + return outliner::InstrType::Illegal; + } + + return outliner::InstrType::Legal; +} + +void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { + for (MachineInstr &MI : MBB) { + if (!MI.mayLoadOrStore()) + continue; + checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true); + } +} + +void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It) const { + unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; + int Align = -Subtarget.getStackAlignment().value(); + BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) + .addReg(ARM::LR, RegState::Kill) + .addReg(ARM::SP) + .addImm(Align) + .add(predOps(ARMCC::AL)); +} + +void ARMBaseInstrInfo::restoreLRFromStack( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &It) const { + unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP); + if (!Subtarget.isThumb()) + MIB.addReg(0); + MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL)); +} + +void ARMBaseInstrInfo::buildOutlinedFrame( + MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { + unsigned StackAlignment = Subtarget.getStackAlignment().value(); + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (OF.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + if (Subtarget.isThumb()) + if (Call->getOperand(2).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPr)) + .add(Call->getOperand(2)); + else if (Subtarget.isTargetMachO()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPd)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPdND)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else if (Call->getOperand(0).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPr)) + .add(Call->getOperand(0)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPd)) + .add(Call->getOperand(0)); + Call->eraseFromParent(); + } + // Is there a call in the outlined range? + auto IsNonTailCall = [](MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); + }; + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) { + // LR has to be a live in so that we can save it. + MBB.addLiveIn(ARM::LR); + + MachineBasicBlock::iterator It = MBB.begin(); + MachineBasicBlock::iterator Et = MBB.end(); + + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + Et = std::prev(MBB.end()); + + // Insert a save before the outlined region + saveLROnStack(MBB, It); + + // Fix up the instructions in the range, since we're going to modify the + // stack. + assert(OF.FrameConstructionID != MachineOutlinerDefault && + "Can only fix up stack references once"); + fixupPostOutline(MBB); + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const MCRegisterInfo *MRI = STI.getRegisterInfo(); + unsigned DwarfReg = MRI->getDwarfRegNum(ARM::LR, true); + + // Add a CFI saying the stack was moved down. + int64_t StackPosEntry = + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, + StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Add a CFI saying that the LR that we want to find is now higher than + // before. + int64_t LRPosEntry = + MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, + StackAlignment)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + + // Insert a restore before the terminator for the function. + // Restore the link register. + restoreLRFromStack(MBB, Et); + } + + // If this is a tail call outlined function, then there's already a return. + if (OF.FrameConstructionID == MachineOutlinerTailCall || + OF.FrameConstructionID == MachineOutlinerThunk) + return; + + // It's not a tail call, so we have to insert the return ourselves. Get the + // correct opcode from current feature set. + BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode())) + .add(predOps(ARMCC::AL)); + + // Did we have to modify the stack by saving the link register? + if (OF.FrameConstructionID != MachineOutlinerDefault) + return; + + // We modified the stack. + // Walk over the basic block and fix up all the stack accesses. + fixupPostOutline(MBB); +} + +MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, const outliner::Candidate &C) const { + MachineInstrBuilder MIB; + MachineBasicBlock::iterator CallPt; + unsigned Opc; + bool isThumb = Subtarget.isThumb(); + + // Are we tail calling? + if (C.CallConstructionID == MachineOutlinerTailCall) { + // If yes, then we can just branch to the label. + Opc = isThumb + ? (Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + : ARM::TAILJMPd; + MIB = BuildMI(MF, DebugLoc(), get(Opc)) + .addGlobalAddress(M.getNamedValue(MF.getName())); + if (isThumb) + MIB.add(predOps(ARMCC::AL)); + It = MBB.insert(It, MIB); + return It; + } + + // Create the call instruction. + Opc = isThumb ? ARM::tBL : ARM::BL; + MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc)); + ; + if (isThumb) + CallMIB.add(predOps(ARMCC::AL)); + CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + + // Are we saving the link register? + if (C.CallConstructionID == MachineOutlinerNoLRSave || + C.CallConstructionID == MachineOutlinerThunk) { + // No, so just insert the call. + It = MBB.insert(It, CallMIB); + return It; + } + + MBB.addLiveIn(ARM::LR); + // Can we save to a register? + if (C.CallConstructionID == MachineOutlinerRegSave) { + unsigned Reg = findRegisterToSaveLRTo(C); + assert(Reg != 0 && "No callee-saved register available?"); + + // Save and restore LR from that register. + copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true); + CallPt = MBB.insert(It, CallMIB); + copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true); + It--; + return CallPt; + } + + // We have the default case. Save and restore from SP. + saveLROnStack(MBB, It); + CallPt = MBB.insert(It, CallMIB); + restoreLRFromStack(MBB, It); + It--; + return CallPt; +} Index: llvm/lib/CodeGen/ReachingDefAnalysis.cpp =================================================================== --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -145,6 +145,9 @@ LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n"); + // Guarantee that BB numbers are in sync with their positions. + MF->RenumberBlocks(); + // Initialize the MBBOutRegsInfos MBBOutRegsInfos.resize(mf.getNumBlockIDs()); Index: llvm/lib/CodeGen/MachineOutliner.cpp =================================================================== --- llvm/lib/CodeGen/MachineOutliner.cpp +++ llvm/lib/CodeGen/MachineOutliner.cpp @@ -1157,6 +1157,9 @@ // Outlined functions shouldn't preserve liveness. MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); MF.getRegInfo().freezeReservedRegs(MF); + MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); + MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); // If there's a DISubprogram associated with this outlined function, then // emit debug info for the outlined function. Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -5859,8 +5859,9 @@ // We only support -moutline in AArch64 right now. If we're not compiling // for AArch64, emit a warning and ignore the flag. Otherwise, add the // proper mllvm flags. - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_32) { + if (!(Triple.isARM() || Triple.isThumb() || + Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32)) { D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName(); } else { CmdArgs.push_back("-mllvm");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits