lenary updated this revision to Diff 428978.
lenary marked 3 inline comments as done.
lenary added a comment.
- Address comment nits
- Rebase
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D119720/new/
https://reviews.llvm.org/D119720
Files:
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Arch/ARM.cpp
clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c
llvm/lib/Target/ARM/ARM.h
llvm/lib/Target/ARM/ARM.td
llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp
llvm/lib/Target/ARM/ARMTargetMachine.cpp
llvm/lib/Target/ARM/CMakeLists.txt
llvm/test/CodeGen/ARM/O3-pipeline.ll
llvm/test/CodeGen/ARM/aes-erratum-fix.ll
Index: llvm/test/CodeGen/ARM/aes-erratum-fix.ll
===================================================================
--- llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -47,6 +47,7 @@
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_input
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
@@ -67,6 +68,7 @@
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf16
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
@@ -87,6 +89,7 @@
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf32
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
@@ -120,6 +123,8 @@
define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aese_once_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: aesmc.8 q0, q1
; CHECK-FIX-NEXT: bx lr
@@ -156,6 +161,9 @@
define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aese_twice_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q1
; CHECK-FIX-NEXT: aese.8 q8, q0
@@ -219,6 +227,8 @@
define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
; CHECK-FIX-LABEL: aese_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB9_2
; CHECK-FIX-NEXT: .LBB9_1: @ =>This Inner Loop Header: Depth=1
@@ -249,6 +259,7 @@
define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
@@ -260,6 +271,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
@@ -281,6 +293,7 @@
define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set8_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.8 d0[0], r0
; CHECK-FIX-NEXT: vmov.8 d16[0], r0
@@ -300,6 +313,7 @@
define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB12_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -351,6 +365,7 @@
define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB13_2
@@ -380,6 +395,7 @@
define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrb r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strb r1, [r2]
@@ -426,6 +442,7 @@
define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB15_1:
@@ -469,6 +486,7 @@
define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
@@ -480,6 +498,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
@@ -505,6 +524,7 @@
define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set16_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NEXT: vmov.16 d16[0], r0
@@ -528,6 +548,7 @@
define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB18_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -588,6 +609,7 @@
define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB19_2
@@ -621,6 +643,7 @@
define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
@@ -669,6 +692,7 @@
define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB21_1:
@@ -714,6 +738,7 @@
define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
@@ -725,6 +750,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
@@ -750,6 +776,7 @@
define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set32_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
@@ -773,6 +800,7 @@
define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB24_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -833,6 +861,7 @@
define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB25_2
@@ -866,6 +895,7 @@
define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: str r1, [r2]
@@ -914,6 +944,7 @@
define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB27_1:
@@ -959,6 +990,7 @@
define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
@@ -969,6 +1001,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
@@ -993,6 +1026,7 @@
define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set64_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
@@ -1029,6 +1063,7 @@
; CHECK-FIX-NOSCHED-NEXT: .LBB30_3:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
@@ -1048,6 +1083,7 @@
; CHECK-CORTEX-FIX-NEXT: .LBB30_3:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
@@ -1093,6 +1129,7 @@
define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set64_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [sp]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
@@ -1129,6 +1166,7 @@
define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
@@ -1150,6 +1188,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
@@ -1200,6 +1239,7 @@
define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set64_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB33_1:
@@ -1248,6 +1288,7 @@
define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
@@ -1259,6 +1300,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
@@ -1285,6 +1327,7 @@
define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_setf16_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vmov r1, s0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vmov.16 d2[0], r1
@@ -1964,6 +2007,7 @@
define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
@@ -2013,6 +2057,7 @@
define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_setf16_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB39_1:
@@ -2066,6 +2111,8 @@
; CHECK-FIX-NEXT: vldr s0, [r0]
; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
; CHECK-FIX-NEXT: vmov.f32 s4, s0
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q1
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
@@ -2090,6 +2137,8 @@
; CHECK-FIX-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
; CHECK-FIX-NEXT: vmov.f32 s0, s4
+; CHECK-FIX-NEXT: vorr q0, q0, q0
+; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: aese.8 q0, q1
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
@@ -2110,6 +2159,7 @@
define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB42_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -2173,8 +2223,10 @@
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
+; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
+; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
@@ -2185,8 +2237,10 @@
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
+; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
+; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
@@ -2217,6 +2271,7 @@
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
@@ -2235,6 +2290,7 @@
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
; CHECK-CORTEX-FIX-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
@@ -2283,6 +2339,8 @@
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
+; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
+; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
@@ -2299,7 +2357,9 @@
; CHECK-CORTEX-FIX-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
+; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
+; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
@@ -2357,6 +2417,7 @@
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_input
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
@@ -2377,6 +2438,7 @@
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf16
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
@@ -2397,6 +2459,7 @@
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf32
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
@@ -2430,6 +2493,8 @@
define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aesd_once_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: aesimc.8 q0, q1
; CHECK-FIX-NEXT: bx lr
@@ -2466,6 +2531,9 @@
define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aesd_twice_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q1
; CHECK-FIX-NEXT: aesd.8 q8, q0
@@ -2529,6 +2597,8 @@
define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
; CHECK-FIX-LABEL: aesd_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB55_2
; CHECK-FIX-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
@@ -2559,6 +2629,7 @@
define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
@@ -2570,6 +2641,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
@@ -2591,6 +2663,7 @@
define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set8_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.8 d0[0], r0
; CHECK-FIX-NEXT: vmov.8 d16[0], r0
@@ -2610,6 +2683,7 @@
define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB58_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -2661,6 +2735,7 @@
define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB59_2
@@ -2690,6 +2765,7 @@
define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrb r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strb r1, [r2]
@@ -2736,6 +2812,7 @@
define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB61_1:
@@ -2779,6 +2856,7 @@
define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
@@ -2790,6 +2868,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
@@ -2815,6 +2894,7 @@
define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set16_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NEXT: vmov.16 d16[0], r0
@@ -2838,6 +2918,7 @@
define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB64_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -2898,6 +2979,7 @@
define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB65_2
@@ -2931,6 +3013,7 @@
define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
@@ -2979,6 +3062,7 @@
define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB67_1:
@@ -3024,6 +3108,7 @@
define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
@@ -3035,6 +3120,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
@@ -3060,6 +3146,7 @@
define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set32_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
@@ -3083,6 +3170,7 @@
define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB70_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -3143,6 +3231,7 @@
define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB71_2
@@ -3176,6 +3265,7 @@
define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: str r1, [r2]
@@ -3224,6 +3314,7 @@
define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB73_1:
@@ -3269,6 +3360,7 @@
define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
@@ -3279,6 +3371,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
@@ -3303,6 +3396,7 @@
define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set64_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
@@ -3339,6 +3433,7 @@
; CHECK-FIX-NOSCHED-NEXT: .LBB76_3:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
@@ -3358,6 +3453,7 @@
; CHECK-CORTEX-FIX-NEXT: .LBB76_3:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
@@ -3403,6 +3499,7 @@
define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set64_cond_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [sp]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
@@ -3439,6 +3536,7 @@
define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
@@ -3460,6 +3558,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
@@ -3510,6 +3609,7 @@
define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set64_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB79_1:
@@ -3558,6 +3658,7 @@
define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
@@ -3569,6 +3670,7 @@
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
@@ -3595,6 +3697,7 @@
define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vmov r1, s0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vmov.16 d2[0], r1
@@ -4274,6 +4377,7 @@
define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
@@ -4323,6 +4427,7 @@
define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_loop_via_val:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB85_1:
@@ -4376,6 +4481,8 @@
; CHECK-FIX-NEXT: vldr s0, [r0]
; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
; CHECK-FIX-NEXT: vmov.f32 s4, s0
+; CHECK-FIX-NEXT: vorr q1, q1, q1
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q1
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
@@ -4400,6 +4507,8 @@
; CHECK-FIX-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
; CHECK-FIX-NEXT: vmov.f32 s0, s4
+; CHECK-FIX-NEXT: vorr q0, q0, q0
+; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: aesd.8 q0, q1
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
@@ -4420,6 +4529,7 @@
define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
+; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB88_2
; CHECK-FIX-NEXT: @ %bb.1:
@@ -4483,8 +4593,10 @@
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
+; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
+; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
@@ -4495,8 +4607,10 @@
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
+; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
+; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
@@ -4527,6 +4641,7 @@
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
+; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
@@ -4545,6 +4660,7 @@
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
; CHECK-CORTEX-FIX-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
+; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
@@ -4593,6 +4709,8 @@
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
+; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
+; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
@@ -4609,7 +4727,9 @@
; CHECK-CORTEX-FIX-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
+; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
+; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
Index: llvm/test/CodeGen/ARM/O3-pipeline.ll
===================================================================
--- llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -188,6 +188,8 @@
; CHECK-NEXT: Live DEBUG_VALUE analysis
; CHECK-NEXT: Machine Outliner
; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: ReachingDefAnalysis
+; CHECK-NEXT: ARM fix for Cortex-A57 AES Erratum 1742098
; CHECK-NEXT: ARM Branch Targets
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: ARM constant island placement and branch shortening pass
Index: llvm/lib/Target/ARM/CMakeLists.txt
===================================================================
--- llvm/lib/Target/ARM/CMakeLists.txt
+++ llvm/lib/Target/ARM/CMakeLists.txt
@@ -32,6 +32,7 @@
ARMConstantPoolValue.cpp
ARMExpandPseudoInsts.cpp
ARMFastISel.cpp
+ ARMFixCortexA57AES1742098Pass.cpp
ARMFrameLowering.cpp
ARMHazardRecognizer.cpp
ARMInstructionSelector.cpp
Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -107,6 +107,7 @@
initializeMVEGatherScatterLoweringPass(Registry);
initializeARMSLSHardeningPass(Registry);
initializeMVELaneInterleavingPass(Registry);
+ initializeARMFixCortexA57AES1742098Pass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -580,8 +581,20 @@
}
void ARMPassConfig::addPreEmitPass2() {
+ // Inserts fixup instructions before unsafe AES operations. Instructions may
+ // be inserted at the start of blocks and at within blocks so this pass has to
+ // come before those below.
+ addPass(createARMFixCortexA57AES1742098Pass());
+ // Inserts BTIs at the start of functions and indirectly-called basic blocks,
+ // so passes cannot add to the start of basic blocks once this has run.
addPass(createARMBranchTargetsPass());
+ // Inserts Constant Islands. Block sizes cannot be increased after this point,
+ // as this may push the branch ranges and load offsets of accessing constant
+ // pools out of range..
addPass(createARMConstantIslandPass());
+ // Finalises Low-Overhead Loops. This replaces pseudo instructions with real
+ // instructions, but the pseudos all have conservative sizes so that block
+ // sizes will only be decreased by this pass.
addPass(createARMLowOverheadLoopsPass());
if (TM->getTargetTriple().isOSWindows()) {
Index: llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp
@@ -0,0 +1,431 @@
+//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This pass works around a Cortex Core Fused AES erratum:
+// - Cortex-A57 Erratum 1742098
+// - Cortex-A72 Erratum 1655431
+//
+// The erratum may be triggered if an input vector register to AESE or AESD was
+// last written by an instruction that only updated 32 bits of it. This can
+// occur for either of the input registers.
+//
+// The workaround chosen is to update the input register using `r = VORRq r, r`,
+// as this updates all 128 bits of the register unconditionally, but does not
+// change the values observed in `r`, making the input safe.
+//
+// This pass has to be conservative in a few cases:
+// - an input vector register to the AES instruction is defined outside the
+// current function, where we have to assume the register was updated in an
+// unsafe way; and
+// - an input vector register to the AES instruction is updated along multiple
+// different control-flow paths, where we have to ensure all the register
+// updating instructions are safe.
+//
+// Both of these cases may apply to a input vector register. In either case, we
+// need to ensure that, when the pass is finished, there exists a safe
+// instruction between every unsafe register updating instruction and the AES
+// instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "Utils/ARMBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <assert.h>
+#include <stdint.h>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
+
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
+public:
+ static char ID;
+ explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
+ initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return "ARM fix for Cortex-A57 AES Erratum 1742098";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ReachingDefAnalysis>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ // This is the information needed to insert the fixup in the right place.
+ struct AESFixupLocation {
+ MachineBasicBlock *Block;
+ // The fixup instruction will be inserted *before* InsertionPt.
+ MachineInstr *InsertionPt;
+ MachineOperand *MOp;
+ };
+
+ void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
+ const ARMBaseRegisterInfo *TRI,
+ SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
+
+ void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
+ const ARMBaseRegisterInfo *TRI) const;
+
+ static bool isFirstAESPairInstr(MachineInstr &MI);
+ static bool isSafeAESInput(MachineInstr &MI);
+};
+char ARMFixCortexA57AES1742098::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
+ "ARM fix for Cortex-A57 AES Erratum 1742098", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
+INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
+ "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
+
+//===----------------------------------------------------------------------===//
+
+bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return Opc == ARM::AESD || Opc == ARM::AESE;
+}
+
+bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
+ auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
+ int CCIdx = MI.findFirstPredOperandIdx();
+ if (CCIdx == -1)
+ return false;
+ return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
+ };
+
+ switch (MI.getOpcode()) {
+ // Unknown: Assume not safe.
+ default:
+ return false;
+ // 128-bit wide AES instructions
+ case ARM::AESD:
+ case ARM::AESE:
+ case ARM::AESMC:
+ case ARM::AESIMC:
+ // No CondCode.
+ return true;
+ // 128-bit and 64-bit wide bitwise ops (when condition = al)
+ case ARM::VANDd:
+ case ARM::VANDq:
+ case ARM::VORRd:
+ case ARM::VORRq:
+ case ARM::VEORd:
+ case ARM::VEORq:
+ case ARM::VMVNd:
+ case ARM::VMVNq:
+ // VMOV of 64-bit value between D registers (when condition = al)
+ case ARM::VMOVD:
+ // VMOV of 64 bit value from GPRs (when condition = al)
+ case ARM::VMOVDRR:
+ // VMOV of immediate into D or Q registers (when condition = al)
+ case ARM::VMOVv2i64:
+ case ARM::VMOVv1i64:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv4f32:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv4i32:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv8i16:
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv16i8:
+ // Loads (when condition = al)
+ // VLD Dn, [Rn, #imm]
+ case ARM::VLDRD:
+ // VLDM
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDIA:
+ // VLDn to all lanes.
+ case ARM::VLD1d64:
+ case ARM::VLD1q64:
+ case ARM::VLD1d32:
+ case ARM::VLD1q32:
+ case ARM::VLD2b32:
+ case ARM::VLD2d32:
+ case ARM::VLD2q32:
+ case ARM::VLD1d16:
+ case ARM::VLD1q16:
+ case ARM::VLD2d16:
+ case ARM::VLD2q16:
+ case ARM::VLD1d8:
+ case ARM::VLD1q8:
+ case ARM::VLD2b8:
+ case ARM::VLD2d8:
+ case ARM::VLD2q8:
+ case ARM::VLD3d32:
+ case ARM::VLD3q32:
+ case ARM::VLD3d16:
+ case ARM::VLD3q16:
+ case ARM::VLD3d8:
+ case ARM::VLD3q8:
+ case ARM::VLD4d32:
+ case ARM::VLD4q32:
+ case ARM::VLD4d16:
+ case ARM::VLD4q16:
+ case ARM::VLD4d8:
+ case ARM::VLD4q8:
+ // VLD1 (single element to one lane)
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd16_UPD:
+ // VLD1 (single element to all lanes)
+ case ARM::VLD1DUPd32:
+ case ARM::VLD1DUPd32wb_fixed:
+ case ARM::VLD1DUPd32wb_register:
+ case ARM::VLD1DUPd16:
+ case ARM::VLD1DUPd16wb_fixed:
+ case ARM::VLD1DUPd16wb_register:
+ case ARM::VLD1DUPd8:
+ case ARM::VLD1DUPd8wb_fixed:
+ case ARM::VLD1DUPd8wb_register:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ // VMOV
+ case ARM::VSETLNi32:
+ case ARM::VSETLNi16:
+ case ARM::VSETLNi8:
+ return CondCodeIsAL(MI);
+ };
+
+ return false;
+}
+
+bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
+ auto &STI = F.getSubtarget<ARMSubtarget>();
+
+ // Fix not requested or AES instructions not present: skip pass.
+ if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
+ return false;
+
+ const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
+ const ARMBaseInstrInfo *TII = STI.getInstrInfo();
+
+ auto &RDA = getAnalysis<ReachingDefAnalysis>();
+
+ // Analyze whole function to find instructions which need fixing up...
+ SmallVector<AESFixupLocation> FixupLocsForFn{};
+ analyzeMF(F, RDA, TRI, FixupLocsForFn);
+
+ // ... and fix the instructions up all at the same time.
+ bool Changed = false;
+ LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
+ for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
+ insertAESFixup(FixupLoc, TII, TRI);
+ Changed |= true;
+ }
+
+ return Changed;
+}
+
+void ARMFixCortexA57AES1742098::analyzeMF(
+ MachineFunction &MF, ReachingDefAnalysis &RDA,
+ const ARMBaseRegisterInfo *TRI,
+ SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
+ unsigned MaxAllowedFixups = 0;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!isFirstAESPairInstr(MI))
+ continue;
+
+ // Found an instruction to check the operands of.
+ LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
+ assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
+ "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
+
+ // A maximum of two fixups should be inserted for each AES pair (one per
+ // register use).
+ MaxAllowedFixups += 2;
+
+ // Inspect all operands, choosing whether to insert a fixup.
+ for (MachineOperand &MOp : MI.uses()) {
+ SmallPtrSet<MachineInstr *, 1> AllDefs{};
+ RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
+
+ // Planned Fixup: This should be added to FixupLocsForFn at most once.
+ AESFixupLocation NewLoc{&MBB, &MI, &MOp};
+
+ // In small functions with loops, this operand may be both a live-in and
+ // have definitions within the function itself. These will need a fixup.
+ bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
+
+ // If the register doesn't have defining instructions, and is not a
+ // live-in, then something is wrong and the fixup must always be
+ // inserted to be safe.
+ if (!IsLiveIn && AllDefs.size() == 0) {
+ LLVM_DEBUG(dbgs()
+ << "Fixup Planned: No Defining Instrs found, not live-in: "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ auto IsUnsafe = [](MachineInstr *MI) -> bool {
+ return !isSafeAESInput(*MI);
+ };
+ size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
+
+ // If there are no unsafe definitions...
+ if (UnsafeCount == 0) {
+ // ... and the register is not live-in ...
+ if (!IsLiveIn) {
+ // ... then skip the fixup.
+ LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ continue;
+ }
+
+ // Otherwise, the only unsafe "definition" is a live-in, so insert the
+ // fixup at the start of the function.
+ LLVM_DEBUG(dbgs()
+ << "Fixup Planned: Live-In (with safe defining instrs): "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ NewLoc.Block = &MF.front();
+ NewLoc.InsertionPt = &*NewLoc.Block->begin();
+ LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
+ << *NewLoc.InsertionPt);
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ // If a fixup is needed in more than one place, then the best place to
+ // insert it is adjacent to the use rather than introducing a fixup
+ // adjacent to each def.
+ //
+ // FIXME: It might be better to hoist this to the start of the BB, if
+ // possible.
+ if (IsLiveIn || UnsafeCount > 1) {
+ LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
+ "(including live-ins): "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ assert(UnsafeCount == 1 && !IsLiveIn &&
+ "At this point, there should be one unsafe defining instrs "
+ "and the defined register should not be a live-in.");
+ SmallPtrSetIterator<MachineInstr *> It =
+ llvm::find_if(AllDefs, IsUnsafe);
+ assert(It != AllDefs.end() &&
+ "UnsafeCount == 1 but No Unsafe MachineInstr found.");
+ MachineInstr *DefMI = *It;
+
+ LLVM_DEBUG(
+ dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
+ << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
+
+ // There is one unsafe defining instruction, which needs a fixup. It is
+ // generally good to hoist the fixup to be adjacent to the defining
+ // instruction rather than the using instruction, as the using
+ // instruction may be inside a loop when the defining instruction is
+ // not.
+ MachineBasicBlock::iterator DefIt = DefMI;
+ ++DefIt;
+ if (DefIt != DefMI->getParent()->end()) {
+ LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
+ << "And immediately before " << *DefIt);
+ NewLoc.Block = DefIt->getParent();
+ NewLoc.InsertionPt = &*DefIt;
+ }
+
+ FixupLocsForFn.emplace_back(NewLoc);
+ }
+ }
+ }
+
+ assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
+ "Inserted too many fixups for this function.");
+}
+
+void ARMFixCortexA57AES1742098::insertAESFixup(
+ AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
+ const ARMBaseRegisterInfo *TRI) const {
+ MachineOperand *OperandToFixup = FixupLoc.MOp;
+
+ assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
+ Register RegToFixup = OperandToFixup->getReg();
+
+ LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
+ << " before: " << *FixupLoc.InsertionPt);
+
+ // Insert the new `VORRq qN, qN, qN`. There are a few details here:
+ //
+ // The uses are marked as killed, even if the original use of OperandToFixup
+ // is not killed, as the new instruction is clobbering the register. This is
+ // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
+ // (it is inserted for microarchitectural reasons).
+ //
+ // The def and the uses are still marked as Renamable if the original register
+ // was, to avoid having to rummage through all the other uses and defs and
+ // unset their renamable bits.
+ unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
+ BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
+ TII->get(ARM::VORRq))
+ .addReg(RegToFixup, RegState::Define | Renamable)
+ .addReg(RegToFixup, RegState::Kill | Renamable)
+ .addReg(RegToFixup, RegState::Kill | Renamable)
+ .addImm((uint64_t)ARMCC::AL)
+ .addReg(ARM::NoRegister);
+}
+
+// Factory function used by AArch64TargetMachine to add the pass to
+// the passmanager.
+FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
+ return new ARMFixCortexA57AES1742098();
+}
Index: llvm/lib/Target/ARM/ARM.td
===================================================================
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -542,6 +542,10 @@
"Don't place a BTI instruction "
"after a return-twice">;
+def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098",
+ "FixCortexA57AES1742098", "true",
+ "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -1157,7 +1161,7 @@
// ARM processors
//
// Dummy CPU, used to target architectures
-def : ProcessorModel<"generic", CortexA8Model, []>;
+def : ProcessorModel<"generic", CortexA8Model, [FeatureFixCortexA57AES1742098]>;
// FIXME: Several processors below are not using their own scheduler
// model, but one of similar/previous processor. These should be fixed.
@@ -1467,13 +1471,15 @@
FeatureCRC,
FeatureFPAO,
FeatureAvoidPartialCPSR,
- FeatureCheapPredicableCPSR]>;
+ FeatureCheapPredicableCPSR,
+ FeatureFixCortexA57AES1742098]>;
def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72,
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeatureFixCortexA57AES1742098]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
FeatureHWDivThumb,
Index: llvm/lib/Target/ARM/ARM.h
===================================================================
--- llvm/lib/Target/ARM/ARM.h
+++ llvm/lib/Target/ARM/ARM.h
@@ -57,6 +57,7 @@
FunctionPass *createARMSLSHardeningPass();
FunctionPass *createARMIndirectThunks();
Pass *createMVELaneInterleavingPass();
+FunctionPass *createARMFixCortexA57AES1742098Pass();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -77,6 +78,7 @@
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
void initializeARMSLSHardeningPass(PassRegistry &);
void initializeMVELaneInterleavingPass(PassRegistry &);
+void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
} // end namespace llvm
Index: clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c
===================================================================
--- /dev/null
+++ clang/test/Driver/arm-fix-cortex-a57-aes-1742098.c
@@ -0,0 +1,25 @@
+// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mfix-cortex-a57-aes-1742098 2>&1 | FileCheck %s --check-prefix=FIX
+// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mno-fix-cortex-a57-aes-1742098 2>&1 | FileCheck %s --check-prefix=NO-FIX
+
+// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mfix-cortex-a72-aes-1655431 2>&1 | FileCheck %s --check-prefix=FIX
+// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a -mno-fix-cortex-a72-aes-1655431 2>&1 | FileCheck %s --check-prefix=NO-FIX
+
+// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a 2>&1 | FileCheck %s --check-prefix=UNSPEC
+// RUN: %clang -### %s -target arm-none-none-eabi -march=armv8a 2>&1 | FileCheck %s --check-prefix=UNSPEC
+
+// This test checks that "-m(no-)fix-cortex-a57-aes-1742098" and
+// "-m(no-)fix-cortex-a72-aes-1655431" cause the "fix-cortex-a57-aes-1742098"
+// target feature to be passed to `clang -cc1`.
+//
+// This feature is also enabled in the backend for the two affected CPUs and the
+// "generic" cpu (used when only specifying -march), but that won't show up on
+// the `clang -cc1` command line.
+//
+// We do not check whether this option is correctly specified for the CPU: users
+// can specify the "-mfix-cortex-a57-aes-1742098" option with "-mcpu=cortex-a72"
+// and vice-versa, and will still get the fix, as the target feature and the fix
+// is the same in both cases.
+
+// FIX: "-target-feature" "+fix-cortex-a57-aes-1742098"
+// NO-FIX: "-target-feature" "-fix-cortex-a57-aes-1742098"
+// UNSPEC-NOT: "-target-feature" "{[+-]}fix-cortex-a57-aes-1742098"
Index: clang/lib/Driver/ToolChains/Arch/ARM.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -733,6 +733,16 @@
Features.push_back("-fix-cmse-cve-2021-35465");
}
+ // This also handles the -m(no-)fix-cortex-a72-1655431 arguments via aliases.
+ if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a57_aes_1742098,
+ options::OPT_mno_fix_cortex_a57_aes_1742098)) {
+ if (A->getOption().matches(options::OPT_mfix_cortex_a57_aes_1742098)) {
+ Features.push_back("+fix-cortex-a57-aes-1742098");
+ } else {
+ Features.push_back("-fix-cortex-a57-aes-1742098");
+ }
+ }
+
// Look for the last occurrence of -mlong-calls or -mno-long-calls. If
// neither options are specified, see if we are compiling for kernel/kext and
// decide whether to pass "+long-calls" based on the OS and its version.
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3451,6 +3451,20 @@
def mno_fix_cmse_cve_2021_35465 : Flag<["-"], "mno-fix-cmse-cve-2021-35465">,
Group<m_arm_Features_Group>,
HelpText<"Don't work around VLLDM erratum CVE-2021-35465 (ARM only)">;
+def mfix_cortex_a57_aes_1742098 : Flag<["-"], "mfix-cortex-a57-aes-1742098">,
+ Group<m_arm_Features_Group>,
+ HelpText<"Work around Cortex-A57 Erratum 1742098 (ARM only)">;
+def mno_fix_cortex_a57_aes_1742098 : Flag<["-"], "mno-fix-cortex-a57-aes-1742098">,
+ Group<m_arm_Features_Group>,
+ HelpText<"Don't work around Cortex-A57 Erratum 1742098 (ARM only)">;
+def mfix_cortex_a72_aes_1655431 : Flag<["-"], "mfix-cortex-a72-aes-1655431">,
+ Group<m_arm_Features_Group>,
+ HelpText<"Work around Cortex-A72 Erratum 1655431 (ARM only)">,
+ Alias<mfix_cortex_a57_aes_1742098>;
+def mno_fix_cortex_a72_aes_1655431 : Flag<["-"], "mno-fix-cortex-a72-aes-1655431">,
+ Group<m_arm_Features_Group>,
+ HelpText<"Don't work around Cortex-A72 Erratum 1655431 (ARM only)">,
+ Alias<mno_fix_cortex_a57_aes_1742098>;
def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">,
Group<m_aarch64_Features_Group>,
HelpText<"Workaround Cortex-A53 erratum 835769 (AArch64 only)">;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits