This revision was automatically updated to reflect the committed changes.
Closed by commit rGe00f189d392d: [InstCombine] Revert rL226781 "Teach
InstCombine to canonicalize loads which… (authored by lebedev.ri).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D88789/new/
https://reviews.llvm.org/D88789
Files:
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
llvm/test/Transforms/InstCombine/atomic.ll
llvm/test/Transforms/InstCombine/load.ll
llvm/test/Transforms/InstCombine/loadstore-metadata.ll
llvm/test/Transforms/InstCombine/non-integral-pointers.ll
llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll
Index: llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll
+++ llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll
@@ -50,10 +50,10 @@
define dso_local void @_Z3gen1S(%0* noalias sret align 8 %arg, %0* byval(%0) align 8 %arg1) {
; CHECK-LABEL: @_Z3gen1S(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG1:%.*]] to i64*
-; CHECK-NEXT: [[I21:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast %0* [[ARG:%.*]] to i64*
-; CHECK-NEXT: store i64 [[I21]], i64* [[TMP1]], align 8
+; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG1:%.*]], i64 0, i32 0
+; CHECK-NEXT: [[I2:%.*]] = load i32*, i32** [[I]], align 8
+; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0
+; CHECK-NEXT: store i32* [[I2]], i32** [[I3]], align 8
; CHECK-NEXT: ret void
;
bb:
@@ -68,13 +68,12 @@
; CHECK-LABEL: @_Z3foo1S(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8
-; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64*
-; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I5_SROA_CAST]], align 8
-; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_CAST:%.*]] = bitcast %0* [[I2]] to i64*
-; CHECK-NEXT: store i64 [[I1_SROA_0_0_COPYLOAD]], i64* [[I_SROA_0_0_I6_SROA_CAST]], align 8
+; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0
+; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8
+; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0
+; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8
; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval(%0) align 8 [[I2]])
-; CHECK-NEXT: [[TMP0]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32*
-; CHECK-NEXT: ret i32* [[TMP0]]
+; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]]
;
bb:
%i = alloca %0, align 8
@@ -108,24 +107,21 @@
define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) {
; CHECK-LABEL: @_Z3bar1S(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64*
-; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I4_SROA_CAST]], align 8
+; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0
+; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8
; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv()
; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0
; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]]
; CHECK: bb7:
; CHECK-NEXT: tail call void @_Z5sync0v()
-; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32*
-; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP0]])
+; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]])
; CHECK-NEXT: br label [[BB13:%.*]]
; CHECK: bb10:
; CHECK-NEXT: tail call void @_Z5sync1v()
-; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32*
-; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]])
+; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]])
; CHECK-NEXT: br label [[BB13]]
; CHECK: bb13:
-; CHECK-NEXT: [[DOTPRE_PHI:%.*]] = phi i32* [ [[TMP1]], [[BB10]] ], [ [[TMP0]], [[BB7]] ]
-; CHECK-NEXT: ret i32* [[DOTPRE_PHI]]
+; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]]
;
bb:
%i = alloca %0, align 8
Index: llvm/test/Transforms/InstCombine/non-integral-pointers.ll
===================================================================
--- llvm/test/Transforms/InstCombine/non-integral-pointers.ll
+++ llvm/test/Transforms/InstCombine/non-integral-pointers.ll
@@ -41,10 +41,8 @@
; integers, since pointers in address space 3 are integral.
; CHECK-LABEL: @f_3(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 addrspace(3)** [[PTR0:%.*]] to i64*
-; CHECK-NEXT: [[VAL1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)** [[PTR1:%.*]] to i64*
-; CHECK-NEXT: store i64 [[VAL1]], i64* [[TMP1]], align 8
+; CHECK-NEXT: [[VAL:%.*]] = load i8 addrspace(3)*, i8 addrspace(3)** [[PTR0:%.*]], align 8
+; CHECK-NEXT: store i8 addrspace(3)* [[VAL]], i8 addrspace(3)** [[PTR1:%.*]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -79,13 +77,13 @@
define i64 @g2(i8* addrspace(4)* %gp) {
; CHECK-LABEL: @g2(
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* addrspace(4)* [[GP:%.*]] to i64 addrspace(4)*
-; CHECK-NEXT: [[DOTPRE1:%.*]] = load i64, i64 addrspace(4)* [[TMP1]], align 8
+; CHECK-NEXT: [[DOTPRE:%.*]] = load i8*, i8* addrspace(4)* [[GP:%.*]], align 8
; CHECK-NEXT: [[V74:%.*]] = call i8 addrspace(4)* @alloc()
; CHECK-NEXT: [[V77:%.*]] = getelementptr i8, i8 addrspace(4)* [[V74]], i64 -8
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i64 addrspace(4)*
-; CHECK-NEXT: store i64 [[DOTPRE1]], i64 addrspace(4)* [[TMP2]], align 8
-; CHECK-NEXT: ret i64 [[DOTPRE1]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(4)* [[V77]] to i8* addrspace(4)*
+; CHECK-NEXT: store i8* [[DOTPRE]], i8* addrspace(4)* [[TMP1]], align 8
+; CHECK-NEXT: [[V81_CAST:%.*]] = ptrtoint i8* [[DOTPRE]] to i64
+; CHECK-NEXT: ret i64 [[V81_CAST]]
;
%.pre = load i8*, i8* addrspace(4)* %gp, align 8
%v74 = call i8 addrspace(4)* @alloc()
Index: llvm/test/Transforms/InstCombine/loadstore-metadata.ll
===================================================================
--- llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -161,24 +161,11 @@
}
define void @test_load_cast_combine_nonnull(float** %ptr) {
-; We can't preserve nonnull metadata when converting a load of a pointer to
-; a load of an integer. Instead, we translate it to range metadata.
-; FIXME: We should also transform range metadata back into nonnull metadata.
-; FIXME: This test is very fragile. If any LABEL lines are added after
-; this point, the test will fail, because this test depends on a metadata tuple,
-; which is always emitted at the end of the file. At some point, we should
-; consider an option to the IR printer to emit MD tuples after the function
-; that first uses them--this will allow us to refer to them like this and not
-; have the tests break. For now, this function must always come last in this
-; file, and no LABEL lines are to be added after this point.
-;
; CHECK-LABEL: @test_load_cast_combine_nonnull(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast float** [[PTR:%.*]] to i64*
-; CHECK-NEXT: [[P1:%.*]] = load i64, i64* [[TMP0]], align 8, !range ![[MD:[0-9]+]]
+; CHECK-NEXT: [[P:%.*]] = load float*, float** [[PTR:%.*]], align 8, !nonnull !7
; CHECK-NEXT: [[GEP:%.*]] = getelementptr float*, float** [[PTR]], i64 42
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast float** [[GEP]] to i64*
-; CHECK-NEXT: store i64 [[P1]], i64* [[TMP1]], align 8
+; CHECK-NEXT: store float* [[P]], float** [[GEP]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -188,8 +175,6 @@
ret void
}
-; This is the metadata tuple that we reference above:
-; CHECK: ![[MD]] = !{i64 1, i64 0}
!0 = !{!1, !1, i64 0}
!1 = !{!"scalar type", !2}
!2 = !{!"root"}
Index: llvm/test/Transforms/InstCombine/load.ll
===================================================================
--- llvm/test/Transforms/InstCombine/load.ll
+++ llvm/test/Transforms/InstCombine/load.ll
@@ -205,18 +205,16 @@
define void @test16(i8* %x, i8* %a, i8* %b, i8* %c) {
; CHECK-LABEL: @test16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32*
-; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32*
-; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32*
-; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32*
-; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32*
-; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4
+; CHECK-NEXT: [[X_CAST:%.*]] = bitcast i8* [[X:%.*]] to float*
+; CHECK-NEXT: [[A_CAST:%.*]] = bitcast i8* [[A:%.*]] to float*
+; CHECK-NEXT: [[B_CAST:%.*]] = bitcast i8* [[B:%.*]] to float*
+; CHECK-NEXT: [[X1:%.*]] = load float, float* [[X_CAST]], align 4
+; CHECK-NEXT: store float [[X1]], float* [[A_CAST]], align 4
+; CHECK-NEXT: store float [[X1]], float* [[B_CAST]], align 4
+; CHECK-NEXT: [[X2:%.*]] = load float, float* [[X_CAST]], align 4
+; CHECK-NEXT: store float [[X2]], float* [[B_CAST]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[C:%.*]] to float*
+; CHECK-NEXT: store float [[X2]], float* [[TMP0]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -240,18 +238,16 @@
define void @test16-vect(i8* %x, i8* %a, i8* %b, i8* %c) {
; CHECK-LABEL: @test16-vect(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; CHECK-NEXT: [[X11:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32*
-; CHECK-NEXT: store i32 [[X11]], i32* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32*
-; CHECK-NEXT: store i32 [[X11]], i32* [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[X]] to i32*
-; CHECK-NEXT: [[X22:%.*]] = load i32, i32* [[TMP3]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[B]] to i32*
-; CHECK-NEXT: store i32 [[X22]], i32* [[TMP4]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[C:%.*]] to i32*
-; CHECK-NEXT: store i32 [[X22]], i32* [[TMP5]], align 4
+; CHECK-NEXT: [[X_CAST:%.*]] = bitcast i8* [[X:%.*]] to <4 x i8>*
+; CHECK-NEXT: [[A_CAST:%.*]] = bitcast i8* [[A:%.*]] to <4 x i8>*
+; CHECK-NEXT: [[B_CAST:%.*]] = bitcast i8* [[B:%.*]] to <4 x i8>*
+; CHECK-NEXT: [[X1:%.*]] = load <4 x i8>, <4 x i8>* [[X_CAST]], align 4
+; CHECK-NEXT: store <4 x i8> [[X1]], <4 x i8>* [[A_CAST]], align 4
+; CHECK-NEXT: store <4 x i8> [[X1]], <4 x i8>* [[B_CAST]], align 4
+; CHECK-NEXT: [[X2:%.*]] = load <4 x i8>, <4 x i8>* [[X_CAST]], align 4
+; CHECK-NEXT: store <4 x i8> [[X2]], <4 x i8>* [[B_CAST]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[C:%.*]] to <4 x i8>*
+; CHECK-NEXT: store <4 x i8> [[X2]], <4 x i8>* [[TMP0]], align 4
; CHECK-NEXT: ret void
;
entry:
Index: llvm/test/Transforms/InstCombine/atomic.ll
===================================================================
--- llvm/test/Transforms/InstCombine/atomic.ll
+++ llvm/test/Transforms/InstCombine/atomic.ll
@@ -325,11 +325,9 @@
define i32 @test18(float* %p) {
; CHECK-LABEL: @test18(
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to i32*
-; CHECK-NEXT: [[X1:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
+; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] unordered, align 4
; CHECK-NEXT: call void @clobber()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P]] to i32*
-; CHECK-NEXT: store atomic i32 [[X1]], i32* [[TMP2]] unordered, align 4
+; CHECK-NEXT: store atomic float [[X]], float* [[P]] unordered, align 4
; CHECK-NEXT: ret i32 0
;
%x = load atomic float, float* %p unordered, align 4
@@ -376,10 +374,8 @@
define void @pr27490a(i8** %p1, i8** %p2) {
; CHECK-LABEL: @pr27490a(
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64*
-; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64*
-; CHECK-NEXT: store volatile i64 [[L1]], i64* [[TMP2]], align 8
+; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
+; CHECK-NEXT: store volatile i8* [[L]], i8** [[P2:%.*]], align 8
; CHECK-NEXT: ret void
;
%l = load i8*, i8** %p1
@@ -389,10 +385,8 @@
define void @pr27490b(i8** %p1, i8** %p2) {
; CHECK-LABEL: @pr27490b(
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64*
-; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64*
-; CHECK-NEXT: store atomic i64 [[L1]], i64* [[TMP2]] seq_cst, align 8
+; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
+; CHECK-NEXT: store atomic i8* [[L]], i8** [[P2:%.*]] seq_cst, align 8
; CHECK-NEXT: ret void
;
%l = load i8*, i8** %p1
Index: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -554,42 +554,8 @@
if (LI.getPointerOperand()->isSwiftError())
return nullptr;
- Type *Ty = LI.getType();
const DataLayout &DL = IC.getDataLayout();
- // Try to canonicalize loads which are only ever stored to operate over
- // integers instead of any other type. We only do this when the loaded type
- // is sized and has a size exactly the same as its store size and the store
- // size is a legal integer type.
- // Do not perform canonicalization if minmax pattern is found (to avoid
- // infinite loop).
- Type *Dummy;
- if (!Ty->isIntegerTy() && Ty->isSized() && !isa<ScalableVectorType>(Ty) &&
- DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
- DL.typeSizeEqualsStoreSize(Ty) && !DL.isNonIntegralPointerType(Ty) &&
- !isMinMaxWithLoads(InstCombiner::peekThroughBitcast(
- LI.getPointerOperand(), /*OneUseOnly=*/true),
- Dummy)) {
- if (all_of(LI.users(), [&LI](User *U) {
- auto *SI = dyn_cast<StoreInst>(U);
- return SI && SI->getPointerOperand() != &LI &&
- !SI->getPointerOperand()->isSwiftError();
- })) {
- LoadInst *NewLoad = IC.combineLoadToNewType(
- LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
- // Replace all the stores with stores of the newly loaded value.
- for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
- auto *SI = cast<StoreInst>(*UI++);
- IC.Builder.SetInsertPoint(SI);
- combineStoreToNewValue(IC, *SI, NewLoad);
- IC.eraseInstFromFunction(*SI);
- }
- assert(LI.use_empty() && "Failed to remove all users of the load!");
- // Return the old load so the combiner can delete it safely.
- return &LI;
- }
- }
-
// Fold away bit casts of the loaded value by loading the desired type.
// We can do this for BitCastInsts as well as casts from and to pointer types,
// as long as those are noops (i.e., the source or dest type have the same
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
===================================================================
--- clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -72,9 +72,9 @@
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-512-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, [[TBAA13:!tbaa !.*]]
-// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to i64*
-// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA10]]
-// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, [[TBAA10]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to <8 x i8>*
+// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA10]]
+// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* @global_bool, align 2, [[TBAA10]]
// CHECK-512-NEXT: ret void
//
void write_global_bool(svbool_t v) { global_bool = v; }
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
===================================================================
--- clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
@@ -81,13 +81,11 @@
// CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[TYPE_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE]] to i64*
-// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to i64*
-// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to <vscale x 16 x i1>*
-// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP3]], align 16, [[TBAA6]]
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
+// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x i8>, <8 x i8>* [[TYPE]], align 16, [[TBAA6]]
+// CHECK-NEXT: store <8 x i8> [[TYPE1]], <8 x i8>* [[TYPE_ADDR]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]], align 16, [[TBAA6]]
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
//
svbool_t to_svbool_t(fixed_bool_t type) {
return type;
@@ -98,12 +96,12 @@
// CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: store <vscale x 16 x i1> [[TYPE:%.*]], <vscale x 16 x i1>* [[TYPE_ADDR]], align 16, [[TBAA13:!tbaa !.*]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[TYPE_ADDR]] to i64*
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
-// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[TYPE_ADDR]] to <8 x i8>*
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
+// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
//
fixed_bool_t from_svbool_t(svbool_t type) {
return type;
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
===================================================================
--- clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -169,28 +169,24 @@
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64*
-// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP2]] to <vscale x 16 x i1>*
-// CHECK-NEXT: store <vscale x 16 x i1> [[OP2_COERCE:%.*]], <vscale x 16 x i1>* [[TMP2]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP2]] to i64*
-// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64*
-// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to i64*
-// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
-// CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP6]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to <vscale x 16 x i1>*
-// CHECK-NEXT: [[TMP9:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP8]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP9]])
-// CHECK-NEXT: store <vscale x 16 x i1> [[TMP10]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]]
-// CHECK-NEXT: [[TMP11:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
-// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP13:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
-// CHECK-NEXT: store i64 [[TMP12]], i64* [[TMP13]], align 16
-// CHECK-NEXT: [[TMP14:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP14]]
+// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP2]] to <vscale x 16 x i1>*
+// CHECK-NEXT: store <vscale x 16 x i1> [[OP2_COERCE:%.*]], <vscale x 16 x i1>* [[TMP1]], align 16
+// CHECK-NEXT: [[OP22:%.*]] = load <8 x i8>, <8 x i8>* [[OP2]], align 16, [[TBAA6]]
+// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]]
+// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[OP2_ADDR]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP2]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP4]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]])
+// CHECK-NEXT: store <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]]
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
+// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
+// CHECK-NEXT: store <8 x i8> [[TMP7]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT: [[TMP8:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]]
//
fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) {
return svsel(pg, op1, op2);
@@ -260,20 +256,18 @@
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64*
-// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64*
-// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
-// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP3]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[OP2:%.*]])
-// CHECK-NEXT: store <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]]
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
-// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP8:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
-// CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP8]], align 16
-// CHECK-NEXT: [[TMP9:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP9]]
+// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]]
+// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT: store <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]]
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
+// CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP5]]
//
fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) {
return svsel(pg, op1, op2);
@@ -325,12 +319,12 @@
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]]
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
-// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
-// CHECK-NEXT: store i64 [[TMP2]], i64* [[TMP3]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
-// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
+// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]]
+// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
+// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
//
fixed_bool_t call_bool_ss(svbool_t pg, svbool_t op1, svbool_t op2) {
return svsel(pg, op1, op2);
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
===================================================================
--- clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -255,22 +255,20 @@
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-256-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]]
-// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to i32*
-// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, [[TBAA6]]
-// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1
-// CHECK-256-NEXT: [[TMP2:%.*]] = bitcast [3 x <4 x i8>]* [[Y]] to i32*
-// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, [[TBAA6]]
+// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <4 x i8>*
+// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 16, [[TBAA6]]
+// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT: store <4 x i8> [[TMP1]], <4 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_bool(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-512-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]]
-// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to i64*
-// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]]
-// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1
-// CHECK-512-NEXT: [[TMP2:%.*]] = bitcast [3 x <8 x i8>]* [[Y]] to i64*
-// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, [[TBAA6]]
+// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <8 x i8>*
+// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
+// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_bool(struct struct_bool *s, svbool_t x) {
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits