This revision was automatically updated to reflect the committed changes.
Closed by commit rG0e4827aa4e4a: [ARM][MachineOutliner] Add Machine Outliner
support for ARM. (authored by yroux).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D76066/new/
https://reviews.llvm.org/D76066
Files:
clang/lib/Driver/ToolChains/Clang.cpp
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
llvm/lib/Target/ARM/ARMBaseInstrInfo.h
llvm/lib/Target/ARM/ARMTargetMachine.cpp
llvm/test/CodeGen/ARM/machine-outliner-tail.ll
llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir
llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir
Index: llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-unsafe-registers.mir
@@ -0,0 +1,114 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=arm-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+
+--- |
+ define void @outline_cpsr_r12_ok() #0 { ret void }
+ define void @dont_outline_cpsr_r12_1() #0 { ret void }
+ define void @dont_outline_cpsr_r12_2() #0 { ret void }
+ declare void @z(i32, i32, i32, i32)
+
+ attributes #0 = { minsize optsize }
+...
+---
+
+name: outline_cpsr_r12_ok
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: outline_cpsr_r12_ok
+ ; CHECK: bb.0:
+ ; CHECK: BL @OUTLINED_FUNCTION_0
+ ; CHECK: $r3 = MOVr $r12, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: bb.1:
+ ; CHECK: BL @OUTLINED_FUNCTION_0
+ ; CHECK: $r4 = MOVr $r12, 14 /* CC::al */, $noreg, $noreg
+ bb.0:
+ $r12 = MOVi 1, 14, $noreg, $noreg
+ CMPri $r12, 42, 14, $noreg, implicit-def $cpsr
+ $r0 = MOVi 1, 14, $noreg, $noreg
+ $r1 = MOVi 1, 14, $noreg, $noreg
+ $r2 = MOVi 1, 14, $noreg, $noreg
+ $r3 = MOVi 1, 14, $noreg, $noreg
+ BL @z
+ $r3 = MOVr $r12, 14, $noreg, $noreg
+ bb.1:
+ $r12 = MOVi 1, 14, $noreg, $noreg
+ CMPri $r12, 42, 14, $noreg, implicit-def $cpsr
+ $r0 = MOVi 1, 14, $noreg, $noreg
+ $r1 = MOVi 1, 14, $noreg, $noreg
+ $r2 = MOVi 1, 14, $noreg, $noreg
+ $r3 = MOVi 1, 14, $noreg, $noreg
+ BL @z
+ $r4 = MOVr $r12, 14, $noreg, $noreg
+ bb.2:
+ BX_RET 14, $noreg
+...
+---
+
+name: dont_outline_cpsr_r12_1
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_cpsr_r12_1
+ ; CHECK: bb.0:
+ ; CHECK: BL @OUTLINED_FUNCTION_1
+ ; CHECK: bb.1:
+ ; CHECK: BL @OUTLINED_FUNCTION_1
+ ; CHECK-LABEL: bb.2:
+ ; CHECK-NOT: BL @OUTLINED_FUNCTION_1
+ bb.0:
+ $r0 = MOVi 2, 14, $noreg, $noreg
+ $r1 = MOVi 2, 14, $noreg, $noreg
+ $r2 = MOVi 2, 14, $noreg, $noreg
+ $r3 = MOVi 2, 14, $noreg, $noreg
+ BL @z
+ bb.1:
+ $r0 = MOVi 2, 14, $noreg, $noreg
+ $r1 = MOVi 2, 14, $noreg, $noreg
+ $r2 = MOVi 2, 14, $noreg, $noreg
+ $r3 = MOVi 2, 14, $noreg, $noreg
+ BL @z
+ bb.2:
+ $r12 = MOVi 1, 14, $noreg, $noreg
+ CMPri $r12, 42, 14, $noreg, implicit-def $cpsr
+ $r0 = MOVi 2, 14, $noreg, $noreg
+ $r1 = MOVi 2, 14, $noreg, $noreg
+ $r2 = MOVi 2, 14, $noreg, $noreg
+ $r3 = MOVi 2, 14, $noreg, $noreg
+ BL @z
+ bb.3:
+ liveins: $cpsr, $r12
+ BX_RET 14, $noreg
+...
+---
+
+name: dont_outline_cpsr_r12_2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_cpsr_r12_2
+ ; CHECK-NOT: BL @OUTLINED_FUNCTION
+ bb.0:
+ liveins: $r12
+ CMPri $r12, 42, 14, $noreg, implicit-def $cpsr
+ $r0 = MOVi 3, 14, $noreg, $noreg
+ $r1 = MOVi 3, 14, $noreg, $noreg
+ $r2 = MOVi 3, 14, $noreg, $noreg
+ $r3 = MOVi 3, 14, $noreg, $noreg
+ BL @z
+ bb.1:
+ liveins: $r12
+ CMPri $r12, 42, 14, $noreg, implicit-def $cpsr
+ $r0 = MOVi 3, 14, $noreg, $noreg
+ $r1 = MOVi 3, 14, $noreg, $noreg
+ $r2 = MOVi 3, 14, $noreg, $noreg
+ $r3 = MOVi 3, 14, $noreg, $noreg
+ BL @z
+ bb.2:
+ liveins: $r12
+ CMPri $r12, 42, 14, $noreg, implicit-def $cpsr
+ $r0 = MOVi 3, 14, $noreg, $noreg
+ $r1 = MOVi 3, 14, $noreg, $noreg
+ $r2 = MOVi 3, 14, $noreg, $noreg
+ $r3 = MOVi 3, 14, $noreg, $noreg
+ BL @z
+ bb.3:
+ BX_RET 14, $noreg
Index: llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-unoutlinable.mir
@@ -0,0 +1,167 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv7-- -run-pass=machine-outliner -verify-machineinstrs \
+# RUN: %s -o - | FileCheck %s
+
+--- |
+ define void @dont_outline_asm() #0 { ret void }
+ define void @dont_outline_lr() #0 { ret void }
+ define void @dont_outline_lr2() #0 { ret void }
+ define void @dont_outline_it() #0 { ret void }
+ define void @dont_outline_pic() #0 { ret void }
+ define void @dont_outline_mve() #0 { ret void }
+ declare void @z(i32, i32, i32, i32)
+
+ attributes #0 = { minsize optsize }
+...
+---
+
+name: dont_outline_asm
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_asm
+ ; CHECK: bb.0:
+ ; CHECK: INLINEASM &"movs r0, #42", 1
+ ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0
+ ; CHECK: bb.1:
+ ; CHECK: INLINEASM &"movs r0, #42", 1
+ ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0
+ bb.0:
+ INLINEASM &"movs r0, #42", 1
+ $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+ $r1, dead $cpsr = tMOVi8 1, 14, $noreg
+ $r2, dead $cpsr = tMOVi8 1, 14, $noreg
+ $r3, dead $cpsr = tMOVi8 1, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.1:
+ INLINEASM &"movs r0, #42", 1
+ $r0, dead $cpsr = tMOVi8 1, 14, $noreg
+ $r1, dead $cpsr = tMOVi8 1, 14, $noreg
+ $r2, dead $cpsr = tMOVi8 1, 14, $noreg
+ $r3, dead $cpsr = tMOVi8 1, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.2:
+ tBX_RET 14, $noreg
+...
+---
+
+name: dont_outline_lr
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_lr
+ ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+ bb.0:
+ liveins: $lr
+ $r0 = tMOVr $lr, 14, $noreg
+ $r1 = tMOVr $lr, 14, $noreg
+ $r2 = tMOVr $lr, 14, $noreg
+ $r3 = tMOVr $lr, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.1:
+ liveins: $lr
+ $r0 = tMOVr $lr, 14, $noreg
+ $r1 = tMOVr $lr, 14, $noreg
+ $r2 = tMOVr $lr, 14, $noreg
+ $r3 = tMOVr $lr, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.2:
+ tBX_RET 14, $noreg
+...
+---
+
+name: dont_outline_lr2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_lr2
+ ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+ bb.0:
+ liveins: $r0
+ $lr = tMOVr $r0, 14, $noreg
+ $r1 = tMOVr $r0, 14, $noreg
+ $r2 = tMOVr $r0, 14, $noreg
+ $r3 = tMOVr $r0, 14, $noreg
+ $r4 = tMOVr $r0, 14, $noreg
+ tBLXr 14, $lr, $noreg
+ bb.1:
+ liveins: $r0
+ $lr = tMOVr $r0, 14, $noreg
+ $r1 = tMOVr $r0, 14, $noreg
+ $r2 = tMOVr $r0, 14, $noreg
+ $r3 = tMOVr $r0, 14, $noreg
+ $r4 = tMOVr $r0, 14, $noreg
+ tBLXr 14, $lr, $noreg
+ bb.2:
+ tBX_RET 14, $noreg
+...
+---
+
+name: dont_outline_it
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_it
+ ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+ bb.0:
+ t2IT 0, 1, implicit-def $itstate
+ $r0, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ $r1, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ $r2, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ $r3, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ tBL 14, $noreg, @z
+ bb.1:
+ t2IT 0, 1, implicit-def $itstate
+ $r0, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ $r1, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ $r2, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ $r3, dead $cpsr = tMOVi8 1, 0, $noreg, implicit $itstate
+ tBL 14, $noreg, @z
+ bb.2:
+ tBX_RET 14, $noreg
+...
+---
+
+name: dont_outline_pic
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_pic
+ ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+ bb.0:
+ $r0 = t2MOVi16_ga_pcrel target-flags(arm-lo16, arm-nonlazy) @z, 0
+ $r0 = t2MOVTi16_ga_pcrel $r0, target-flags(arm-lo16, arm-nonlazy) @z, 0
+ $r0 = PICADD $r0, 1, 14, $noreg
+ $r1 = PICLDR $r0, 2, 14, $noreg
+ PICSTR $r0, $r1, 3, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.1:
+ $r0 = t2MOVi16_ga_pcrel target-flags(arm-lo16, arm-nonlazy) @z, 0
+ $r0 = t2MOVTi16_ga_pcrel $r0, target-flags(arm-lo16, arm-nonlazy) @z, 0
+ $r0 = PICADD $r0, 1, 14, $noreg
+ $r1 = PICLDR $r0, 2, 14, $noreg
+ PICSTR $r0, $r1, 3, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.2:
+ tBX_RET 14, $noreg
+...
+---
+
+name: dont_outline_mve
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_outline_mve
+ ; CHECK-NOT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION
+ bb.0:
+ liveins: $r3, $r4, $q0, $q3, $q4, $q5
+ $q5 = MVE_VDUP32 $r3, 0, $noreg, $q5
+ $q4 = MVE_VDUP32 $r4, 0, $noreg, $q4
+ $q0 = MVE_VADDf32 $q4, $q5, 0, $noreg, $q0
+ t2DoLoopStart $r4
+ $r0 = MVE_VMOV_from_lane_32 renamable $q0, 1, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.1:
+ liveins: $r3, $r4, $q0, $q3, $q4, $q5
+ $q5 = MVE_VDUP32 $r3, 0, $noreg, $q5
+ $q4 = MVE_VDUP32 $r4, 0, $noreg, $q4
+ $q0 = MVE_VADDf32 $q4, $q5, 0, $noreg, $q0
+ t2DoLoopStart $r4
+ $r0 = MVE_VMOV_from_lane_32 renamable $q0, 1, 14, $noreg
+ tBL 14, $noreg, @z
+ bb.2:
+ tBX_RET 14, $noreg
Index: llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
@@ -0,0 +1,119 @@
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \
+; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \
+; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -enable-machine-outliner -verify-machineinstrs \
+; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \
+; RUN: | FileCheck %s --check-prefix=MACHO
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv5-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB1
+
+declare i32 @thunk_called_fn(i32, i32, i32, i32)
+
+define i32 @a() {
+; ARM-LABEL: name: a
+; ARM: bb.0.entry:
+; ARM-NEXT: liveins: $r11, $lr
+; ARM: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r11, killed $lr
+; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
+; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8
+; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}}
+; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $sp = frame-destroy LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r11, def $pc, implicit killed $r0
+
+; THUMB-LABEL: name: a
+; THUMB: bb.0.entry:
+; THUMB-NEXT: liveins: $r7, $lr
+; THUMB: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr
+; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
+; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8
+; THUMB-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14 /* CC::al */, $noreg
+; THUMB-NEXT: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
+
+; MACHO-LABEL: name: a
+; MACHO: bb.0.entry:
+; MACHO-NEXT: liveins: $lr
+; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14 /* CC::al */, $noreg
+; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4
+; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
+; MACHO-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
+
+; THUMB1-NOT: OUTLINED_FUNCTION_0
+
+entry:
+ %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+ %cx = add i32 %call, 8
+ ret i32 %cx
+}
+
+define i32 @b() {
+; ARM-LABEL: name: b
+; ARM: bb.0.entry:
+; ARM-NEXT: liveins: $r11, $lr
+; ARM: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r11, killed $lr
+; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
+; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8
+; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}}
+; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 88, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $sp = frame-destroy LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r11, def $pc, implicit killed $r0
+
+; THUMB-LABEL: name: b
+; THUMB: bb.0.entry:
+; THUMB-NEXT: liveins: $r7, $lr
+; THUMB: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr
+; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
+; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
+; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8
+; THUMB-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14 /* CC::al */, $noreg
+; THUMB-NEXT: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
+
+; MACHO-LABEL: name: b
+; MACHO: bb.0.entry:
+; MACHO-NEXT: liveins: $lr
+; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14 /* CC::al */, $noreg
+; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4
+; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
+; MACHO-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0{{.*}}
+; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
+entry:
+ %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
+ %cx = add i32 %call, 88
+ ret i32 %cx
+}
+
+; ARM-LABEL: name: OUTLINED_FUNCTION_0
+; ARM: bb.0:
+; ARM-NEXT: liveins: $r10, $r9, $r8, $r7, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
+; ARM: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r2 = MOVi 3, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r3 = MOVi 4, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: TAILJMPd @thunk_called_fn, implicit $sp
+
+; THUMB-LABEL: name: OUTLINED_FUNCTION_0
+; THUMB: bb.0:
+; THUMB-NEXT: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
+; THUMB: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; THUMB-NEXT: tTAILJMPdND @thunk_called_fn, 14 /* CC::al */, $noreg, implicit $sp
+
+; MACHO-LABEL: name: OUTLINED_FUNCTION_0
+; MACHO: bb.0:
+; MACHO-NEXT: liveins: $r7, $r6, $r5, $r4, $r11, $r10, $r8, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
+; MACHO: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT: tTAILJMPd @thunk_called_fn, 14 /* CC::al */, $noreg, implicit $sp
Index: llvm/test/CodeGen/ARM/machine-outliner-tail.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/ARM/machine-outliner-tail.ll
@@ -0,0 +1,46 @@
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=arm-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -enable-machine-outliner -verify-machineinstrs \
+; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \
+; RUN: | FileCheck %s --check-prefix=MACHO
+; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv5-- \
+; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB1
+
+; ARM-LABEL: name: OUTLINED_FUNCTION_0
+; ARM: $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r2 = MOVi 3, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: $r3 = MOVi 4, 14 /* CC::al */, $noreg, $noreg
+; ARM-NEXT: TAILJMPd @z
+
+; THUMB-LABEL: name: OUTLINED_FUNCTION_0
+; THUMB: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; THUMB-NEXT: tTAILJMPdND @z, 14 /* CC::al */, $noreg
+
+; MACHO-LABEL: name: OUTLINED_FUNCTION_0
+; MACHO: $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
+; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg
+; MACHO-NEXT: tTAILJMPd @z, 14 /* CC::al */, $noreg
+
+; THUMB1-NOT: OUTLINED_FUNCTION_0
+
+define void @a() {
+entry:
+ tail call void @z(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
+
+declare void @z(i32, i32, i32, i32)
+
+define dso_local void @b(i32* nocapture readnone %p) {
+entry:
+ tail call void @z(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -248,6 +248,10 @@
setSupportsDebugEntryValues(true);
initAsmInfo();
+
+ // ARM supports the MachineOutliner.
+ setMachineOutliner(true);
+ setSupportsDefaultOutlining(false);
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h
===================================================================
--- llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -356,6 +356,22 @@
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ /// ARM supports the MachineOutliner.
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
+ bool OutlineFromLinkOnceODRs) const override;
+ outliner::OutlinedFunction getOutliningCandidateInfo(
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
+ unsigned Flags) const override;
+ bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const override;
+ void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const override;
+ MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It, MachineFunction &MF,
+ const outliner::Candidate &C) const override;
+
private:
unsigned getInstBundleLength(const MachineInstr &MI) const;
Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -5517,3 +5518,372 @@
return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
}
+
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// BX LR I2
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 0 | 0 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// BL f I2
+/// B f
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 0 | 0 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+
+enum MachineOutlinerClass { MachineOutlinerTailCall, MachineOutlinerThunk };
+
+enum MachineOutlinerMBBFlags {
+ LRUnavailableSomewhere = 0x2,
+ HasCalls = 0x4,
+ UnsafeRegsDead = 0x8
+};
+
+struct OutlinerCosts {
+ const int CallTailCall;
+ const int FrameTailCall;
+ const int CallThunk;
+ const int FrameThunk;
+
+ OutlinerCosts(const ARMSubtarget &target)
+ : CallTailCall(target.isThumb() ? 4 : 4),
+ FrameTailCall(target.isThumb() ? 0 : 0),
+ CallThunk(target.isThumb() ? 4 : 4),
+ FrameThunk(target.isThumb() ? 0 : 0) {}
+};
+
+outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+ unsigned SequenceSize =
+ std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+ [this](unsigned Sum, const MachineInstr &MI) {
+ return Sum + getInstSizeInBytes(MI);
+ });
+
+ // Properties about candidate MBBs that hold for all of them.
+ unsigned FlagsSetInAll = 0xF;
+
+ // Compute liveness information for each candidate, and set FlagsSetInAll.
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ std::for_each(
+ RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
+
+ // According to the ARM Procedure Call Standard, the following are
+ // undefined on entry/exit from a function call:
+ //
+ // * Register R12(IP),
+ // * Condition codes (and thus the CPSR register)
+ //
+ // Since we control the instructions which are part of the outlined regions
+ // we don't need to be fully compliant with the AAPCS, but we have to
+ // guarantee that if a veneer is inserted at link time the code is still
+ // correct. Because of this, we can't outline any sequence of instructions
+ // where one of these registers is live into/across it. Thus, we need to
+ // delete those candidates.
+ auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+ // If the unsafe registers in this block are all dead, then we don't need
+ // to compute liveness here.
+ if (C.Flags & UnsafeRegsDead)
+ return false;
+ C.initLRU(TRI);
+ LiveRegUnits LRU = C.LRU;
+ return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
+ };
+
+ // Are there any candidates where those registers are live?
+ if (!(FlagsSetInAll & UnsafeRegsDead)) {
+ // Erase every candidate that violates the restrictions above. (It could be
+ // true that we have viable candidates, so it's not worth bailing out in
+ // the case that, say, 1 out of 20 candidates violate the restructions.)
+ RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+ RepeatedSequenceLocs.end(),
+ CantGuaranteeValueAcrossCall),
+ RepeatedSequenceLocs.end());
+
+ // If the sequence doesn't have enough candidates left, then we're done.
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+ }
+
+ // At this point, we have only "safe" candidates to outline. Figure out
+ // frame + call instruction information.
+
+ unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
+
+ // Helper lambda which sets call information for every candidate.
+ auto SetCandidateCallInfo =
+ [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
+ for (outliner::Candidate &C : RepeatedSequenceLocs)
+ C.setCallInfo(CallID, NumBytesForCall);
+ };
+
+ OutlinerCosts *Costs = new OutlinerCosts(Subtarget);
+ unsigned FrameID = 0;
+ unsigned NumBytesToCreateFrame = 0;
+
+ // If the last instruction in any candidate is a terminator, then we should
+ // tail call all of the candidates.
+ if (RepeatedSequenceLocs[0].back()->isTerminator()) {
+ FrameID = MachineOutlinerTailCall;
+ NumBytesToCreateFrame = Costs->FrameTailCall;
+ SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall);
+ } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
+ LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr ||
+ LastInstrOpcode == ARM::tBLXi) {
+ FrameID = MachineOutlinerThunk;
+ NumBytesToCreateFrame = Costs->FrameThunk;
+ SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk);
+ } else
+ return outliner::OutlinedFunction();
+
+ return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
+ NumBytesToCreateFrame, FrameID);
+}
+
+bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
+ MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
+ const Function &F = MF.getFunction();
+
+ // Can F be deduplicated by the linker? If it can, don't outline from it.
+ if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
+ return false;
+
+ // Don't outline from functions with section markings; the program could
+ // expect that all the code is in the named section.
+ // FIXME: Allow outlining from multiple functions with the same section
+ // marking.
+ if (F.hasSection())
+ return false;
+
+ // FIXME: Thumb1 outlining is not handled
+ if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
+ return false;
+
+ // It's safe to outline from MF.
+ return true;
+}
+
+bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ // Check if LR is available through all of the MBB. If it's not, then set
+ // a flag.
+ assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
+ "Suitable Machine Function for outlining must track liveness");
+
+ LiveRegUnits LRU(getRegisterInfo());
+
+ std::for_each(MBB.rbegin(), MBB.rend(),
+ [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+
+ // Check if each of the unsafe registers are available...
+ bool R12AvailableInBlock = LRU.available(ARM::R12);
+ bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
+
+ // If all of these are dead (and not live out), we know we don't have to check
+ // them later.
+ if (R12AvailableInBlock && CPSRAvailableInBlock)
+ Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+ // Now, add the live outs to the set.
+ LRU.addLiveOuts(MBB);
+
+ // If any of these registers is available in the MBB, but also a live out of
+ // the block, then we know outlining is unsafe.
+ if (R12AvailableInBlock && !LRU.available(ARM::R12))
+ return false;
+ if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
+ return false;
+
+ // Check if there's a call inside this MachineBasicBlock. If there is, then
+ // set a flag.
+ if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+ Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+ if (!LRU.available(ARM::LR))
+ Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+ return true;
+}
+
+outliner::InstrType
+ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
+ unsigned Flags) const {
+ MachineInstr &MI = *MIT;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // Be conservative with inline ASM
+ if (MI.isInlineAsm())
+ return outliner::InstrType::Illegal;
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugInstr() || MI.isIndirectDebugValue())
+ return outliner::InstrType::Invisible;
+
+ // At this point, KILL or IMPLICIT_DEF instructions don't really tell us much
+ // so we can go ahead and skip over them.
+ if (MI.isKill() || MI.isImplicitDef())
+ return outliner::InstrType::Invisible;
+
+ // PIC instructions contain labels, outlining them would break offset
+ // computing. unsigned Opc = MI.getOpcode();
+ unsigned Opc = MI.getOpcode();
+ if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
+ Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
+ Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
+ Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
+ Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
+ Opc == ARM::t2MOV_ga_pcrel)
+ return outliner::InstrType::Illegal;
+
+ // Be conservative with ARMv8.1 MVE instructions.
+ if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
+ Opc == ARM::t2WhileLoopStart || Opc == ARM::t2LoopDec ||
+ Opc == ARM::t2LoopEnd)
+ return outliner::InstrType::Illegal;
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t MIFlags = MCID.TSFlags;
+ if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
+ return outliner::InstrType::Illegal;
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+ // Don't outline if the branch is not unconditional.
+ if (isPredicated(MI))
+ return outliner::InstrType::Illegal;
+
+ // Is this the end of a function?
+ if (MI.getParent()->succ_empty())
+ return outliner::InstrType::Legal;
+
+ // It's not, so don't outline it.
+ return outliner::InstrType::Illegal;
+ }
+
+ // Make sure none of the operands are un-outlinable.
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return outliner::InstrType::Illegal;
+ }
+
+ // Don't outline if link register or program counter value are used.
+ if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
+ return outliner::InstrType::Illegal;
+
+ if (MI.isCall()) {
+ // If we don't know anything about the callee, assume it depends on the
+ // stack layout of the caller. In that case, it's only legal to outline
+ // as a tail-call. Whitelist the call instructions we know about so we
+ // don't get unexpected results with call pseudo-instructions.
+ auto UnknownCallOutlineType = outliner::InstrType::Illegal;
+ if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
+ Opc == ARM::tBLXr || Opc == ARM::tBLXi)
+ UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
+
+ return UnknownCallOutlineType;
+ }
+
+ // Since calls are handled, don't touch LR or PC
+ if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
+ return outliner::InstrType::Illegal;
+
+ // Be conservative with IT blocks.
+ if (MI.readsRegister(ARM::ITSTATE, TRI) ||
+ MI.modifiesRegister(ARM::ITSTATE, TRI))
+ return outliner::InstrType::Illegal;
+
+ // Don't outline positions.
+ if (MI.isPosition())
+ return outliner::InstrType::Illegal;
+
+ return outliner::InstrType::Legal;
+}
+
+void ARMBaseInstrInfo::buildOutlinedFrame(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const {
+ // For thunk outlining, rewrite the last instruction from a call to a
+ // tail-call.
+ if (OF.FrameConstructionID == MachineOutlinerThunk) {
+ MachineInstr *Call = &*--MBB.instr_end();
+ bool isThumb = Subtarget.isThumb();
+ unsigned FuncOp = isThumb ? 2 : 0;
+ unsigned Opc = Call->getOperand(FuncOp).isReg()
+ ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
+ : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
+ : ARM::tTAILJMPdND
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
+ .add(Call->getOperand(FuncOp));
+ if (isThumb && !Call->getOperand(FuncOp).isReg())
+ MIB.add(predOps(ARMCC::AL));
+ Call->eraseFromParent();
+ }
+}
+
+MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, const outliner::Candidate &C) const {
+ MachineInstrBuilder MIB;
+ MachineBasicBlock::iterator CallPt;
+ unsigned Opc;
+ bool isThumb = Subtarget.isThumb();
+
+ // Are we tail calling?
+ if (C.CallConstructionID == MachineOutlinerTailCall) {
+ // If yes, then we can just branch to the label.
+ Opc = isThumb
+ ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
+ : ARM::TAILJMPd;
+ MIB = BuildMI(MF, DebugLoc(), get(Opc))
+ .addGlobalAddress(M.getNamedValue(MF.getName()));
+ if (isThumb)
+ MIB.add(predOps(ARMCC::AL));
+ It = MBB.insert(It, MIB);
+ return It;
+ }
+
+ // Create the call instruction.
+ Opc = isThumb ? ARM::tBL : ARM::BL;
+ MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
+ if (isThumb)
+ CallMIB.add(predOps(ARMCC::AL));
+ CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
+
+ // Insert the call.
+ It = MBB.insert(It, CallMIB);
+ return It;
+}
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6149,11 +6149,12 @@
if (Arg *A = Args.getLastArg(options::OPT_moutline,
options::OPT_mno_outline)) {
if (A->getOption().matches(options::OPT_moutline)) {
- // We only support -moutline in AArch64 right now. If we're not compiling
- // for AArch64, emit a warning and ignore the flag. Otherwise, add the
- // proper mllvm flags.
- if (Triple.getArch() != llvm::Triple::aarch64 &&
- Triple.getArch() != llvm::Triple::aarch64_32) {
+ // We only support -moutline in AArch64 and ARM targets right now. If
+ // we're not compiling for these, emit a warning and ignore the flag.
+ // Otherwise, add the proper mllvm flags.
+ if (!(Triple.isARM() || Triple.isThumb() ||
+ Triple.getArch() == llvm::Triple::aarch64 ||
+ Triple.getArch() == llvm::Triple::aarch64_32)) {
D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName();
} else {
CmdArgs.push_back("-mllvm");
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits