Author: Roman Lebedev Date: 2022-11-27T00:00:06+03:00 New Revision: 25f01d593ce296078f57e872778b77d074ae5888
URL: https://github.com/llvm/llvm-project/commit/25f01d593ce296078f57e872778b77d074ae5888 DIFF: https://github.com/llvm/llvm-project/commit/25f01d593ce296078f57e872778b77d074ae5888.diff LOG: Revert "[SROA] `isVectorPromotionViable()`: memory intrinsics operate on vectors of bytes (take 2)" TableGen is still getting miscompiled on PPC buildbots. Sent a mail with request for help. This reverts commit 3c4d2a03968ccf5889bacffe02d6fa2443b0260f. Added: Modified: clang/test/CodeGenOpenCL/amdgpu-nullptr.cl llvm/lib/Transforms/Scalar/SROA.cpp llvm/test/CodeGen/AMDGPU/v1024.ll llvm/test/DebugInfo/X86/sroasplit-1.ll llvm/test/DebugInfo/X86/sroasplit-4.ll llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll llvm/test/Transforms/SROA/address-spaces.ll llvm/test/Transforms/SROA/alignment.ll llvm/test/Transforms/SROA/alloca-address-space.ll llvm/test/Transforms/SROA/basictest.ll llvm/test/Transforms/SROA/pointer-offset-size.ll llvm/test/Transforms/SROA/slice-width.ll llvm/test/Transforms/SROA/tbaa-struct.ll llvm/test/Transforms/SROA/tbaa-struct2.ll llvm/test/Transforms/SROA/vector-promotion.ll Removed: ################################################################################ diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl index 859e81f08d6bd..65f6f2e7d8c24 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -515,17 +515,13 @@ typedef struct { private char *p; } StructTy3; -// CHECK-LABEL: @test_memset_private( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[STRUCT_STRUCTTY3:%.*]] addrspace(5)* [[PTR:%.*]] to i8 addrspace(5)* -// CHECK-NEXT: [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST:%.*]] = bitcast [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]] to <32 x i8> addrspace(5)* -// CHECK-NEXT: store <32 x i8> zeroinitializer, <32 x i8> addrspace(5)* [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST]], align 8, !tbaa.struct !9 -// CHECK-NEXT: [[S3_SROA_4_0__SROA_IDX6:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTY3]], [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]], i32 0, i32 4 -// CHECK-NEXT: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[S3_SROA_4_0__SROA_IDX6]], align 8, !tbaa.struct !12 -// CHECK-NEXT: [[S3_SROA_5_0__SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP0]], i32 36 -// CHECK-NEXT: [[S3_SROA_5_0__SROA_CAST8:%.*]] = bitcast i8 addrspace(5)* [[S3_SROA_5_0__SROA_IDX]] to i32 addrspace(5)* -// CHECK-NEXT: store i32 0, i32 addrspace(5)* [[S3_SROA_5_0__SROA_CAST8]], align 4, !tbaa.struct !13 -// CHECK-NEXT: ret void +// CHECK-LABEL: test_memset_private +// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* noundef align 8 {{.*}}, i8 0, i64 32, i1 false) +// CHECK: [[GEP:%.*]] = getelementptr inbounds %struct.StructTy3, %struct.StructTy3 addrspace(5)* %ptr, i32 0, i32 4 +// CHECK: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[GEP]] +// CHECK: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* {{.*}}, i32 36 +// CHECK: [[GEP1_CAST:%.*]] = bitcast i8 addrspace(5)* [[GEP1]] to i32 addrspace(5)* +// CHECK: store i32 0, i32 addrspace(5)* [[GEP1_CAST]], align 4 void test_memset_private(private StructTy3 *ptr) { StructTy3 S3 = {0, 0, 0, 0, 0}; *ptr = S3; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index a2d7dc234333f..6dcdd630b6bae 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1806,10 +1806,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, ? Ty->getElementType() : FixedVectorType::get(Ty->getElementType(), NumElements); - Type *SplitIntTy = nullptr; - if (uint64_t Bitwidth = NumElements * ElementSize * 8; - Bitwidth <= IntegerType::MAX_INT_BITS) - SplitIntTy = Type::getIntNTy(Ty->getContext(), Bitwidth); + Type *SplitIntTy = + Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); Use *U = S.getUse(); @@ -1828,8 +1826,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, // Disable vector promotion when there are loads or stores of an FCA. if (LTy->isStructTy()) return false; - if (SplitIntTy && - (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) { + if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { assert(LTy->isIntegerTy()); LTy = SplitIntTy; } @@ -1842,8 +1839,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, // Disable vector promotion when there are loads or stores of an FCA. if (STy->isStructTy()) return false; - if (SplitIntTy && - (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) { + if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { assert(STy->isIntegerTy()); STy = SplitIntTy; } @@ -1893,8 +1889,7 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, /// SSA value. We only can ensure this for a limited set of operations, and we /// don't want to do the rewrites unless we are confident that the result will /// be promotable, so we have an early test here. -static VectorType *isVectorPromotionViable(Partition &P, LLVMContext &Ctx, - const DataLayout &DL) { +static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // Collect the candidate types for vector-based promotion. Also track whether // we have diff erent element types. SmallVector<VectorType *, 4> CandidateTys; @@ -1931,7 +1926,6 @@ static VectorType *isVectorPromotionViable(Partition &P, LLVMContext &Ctx, } } }; - bool SeenMemTransferInst = false; // Consider any loads or stores that are the exact size of the slice. for (const Slice &S : P) if (S.beginOffset() == P.beginOffset() && @@ -1940,29 +1934,8 @@ static VectorType *isVectorPromotionViable(Partition &P, LLVMContext &Ctx, CheckCandidateType(LI->getType()); else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) CheckCandidateType(SI->getValueOperand()->getType()); - else if (isa<MemTransferInst>(S.getUse()->getUser())) - SeenMemTransferInst = true; } - // If we have seen mem transfer intrinsic, - // and the partition is small-enough, - // enqueue appropriate byte vector. - // - // The "small-enough" threshold is somewhat arbitrary, - // and is mostly dictated by compile-time concerns, - // but, at the same time, SDAG SDNode can't handle - // more then 65535 operands, so we should not - // produce vectors with more than ~32768 elements. - // - // Perhaps, we should also take into account the TTI: - // `getNumberOfRegisters() * getRegisterBitWidth() / 8` ? - // - // FIXME: byte type is sticky. If we had any op with byte-typed elements, - // then we should choose that type. - if (SeenMemTransferInst && P.size() <= 32) - CheckCandidateType( - FixedVectorType::get(IntegerType::getInt8Ty(Ctx), P.size())); - // If we didn't find a vector type, nothing to do here. if (CandidateTys.empty()) return nullptr; @@ -2019,6 +1992,13 @@ static VectorType *isVectorPromotionViable(Partition &P, LLVMContext &Ctx, CandidateTys.resize(1); } + // FIXME: hack. Do we have a named constant for this? + // SDAG SDNode can't have more than 65535 operands. + llvm::erase_if(CandidateTys, [](VectorType *VTy) { + return cast<FixedVectorType>(VTy)->getNumElements() > + std::numeric_limits<unsigned short>::max(); + }); + for (VectorType *VTy : CandidateTys) if (checkVectorTypeForPromotion(P, VTy, DL)) return VTy; @@ -4343,9 +4323,8 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS, bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); - VectorType *VecTy = IsIntegerPromotable - ? nullptr - : isVectorPromotionViable(P, AI.getContext(), DL); + VectorType *VecTy = + IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); if (VecTy) SliceTy = VecTy; diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll index 6dbb9443fd5cf..1326ba437f94f 100644 --- a/llvm/test/CodeGen/AMDGPU/v1024.ll +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}test_v1024: ; GCN-NOT: v_accvgpr -; GCN-COUNT-10: v_mov_b32_e32 +; GCN-COUNT-32: v_mov_b32_e32 ; GCN-NOT: v_accvgpr define amdgpu_kernel void @test_v1024() { entry: diff --git a/llvm/test/DebugInfo/X86/sroasplit-1.ll b/llvm/test/DebugInfo/X86/sroasplit-1.ll index 5a80b56950122..0ec368130da28 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-1.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-1.ll @@ -20,8 +20,10 @@ ; ; Verify that SROA creates a variable piece when splitting i1. -; CHECK: %[[I1:.*]] = load <12 x i8>, -; CHECK: call void @llvm.dbg.value(metadata <12 x i8> %[[I1]], metadata ![[VAR:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) +; CHECK: %[[I1:.*]] = alloca [12 x i8], align 4 +; CHECK: call void @llvm.dbg.declare(metadata [12 x i8]* %[[I1]], metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) +; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: ret i32 %[[A]] ; Read Var and Piece: ; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11, diff --git a/llvm/test/DebugInfo/X86/sroasplit-4.ll b/llvm/test/DebugInfo/X86/sroasplit-4.ll index a3b35b820a96c..0d5594ef867dd 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-4.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-4.ll @@ -1,28 +1,28 @@ ; RUN: opt -sroa < %s -S -o - | FileCheck %s ; ; Test that recursively splitting an alloca updates the debug info correctly. -; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC:.*]], metadata ![[Y:.*]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1:.*]], metadata ![[Y]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) -; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) -; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 128)) -; +; CHECK: %[[T:.*]] = load i64, i64* @t, align 8 +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[Y:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) +; CHECK: %[[T1:.*]] = load i64, i64* @t, align 8 +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[Y]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 64)) +; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64)) +; ; struct p { ; __SIZE_TYPE__ s; ; __SIZE_TYPE__ t; ; }; -; +; ; struct r { ; int i; ; struct p x; ; struct p y; ; }; -; +; ; extern int call_me(struct r); ; extern int maybe(); ; extern __SIZE_TYPE__ t; -; +; ; int test() { ; if (maybe()) ; return 0; diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll index 3361ad11c244b..886bde2686bd9 100644 --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -68,13 +68,12 @@ define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3foo1S( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[TMP0]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* +; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8 ; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0 -; CHECK-NEXT: store i32* [[TMP1]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 +; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval([[TMP0]]) align 8 [[I2]]) -; CHECK-NEXT: ret i32* [[TMP1]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] ; bb: %i = alloca %0, align 8 @@ -108,22 +107,21 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3bar1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* +; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8 ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] ; CHECK: bb7: ; CHECK-NEXT: tail call void @_Z5sync0v() -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP1]]) +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb10: ; CHECK-NEXT: tail call void @_Z5sync1v() -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -; CHECK-NEXT: ret i32* [[TMP1]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] ; bb: %i = alloca %0, align 8 diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll index 0300e99f9a217..70e1a682d7bfc 100644 --- a/llvm/test/Transforms/SROA/address-spaces.ll +++ b/llvm/test/Transforms/SROA/address-spaces.ll @@ -11,8 +11,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1) ; Make sure an illegal bitcast isn't introduced define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_1_1( -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 @@ -23,8 +23,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) { ; CHECK-LABEL: @test_address_space_1_0( -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 @@ -35,8 +35,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) { define void @test_address_space_0_1(ptr %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_0_1( -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2 -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2 +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll index ba673b1d16465..66da09cd52d27 100644 --- a/llvm/test/Transforms/SROA/alignment.ll +++ b/llvm/test/Transforms/SROA/alignment.ll @@ -92,15 +92,15 @@ define void @PR13920(ptr %a, ptr %b) { ; Test that alignments on memcpy intrinsics get propagated to loads and stores. ; CHECK-LABEL: @PR13920( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2 -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2 +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: @PR13920( ; DEBUGLOC-NEXT: entry: ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]] -; DEBUGLOC-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] -; DEBUGLOC-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]] +; DEBUGLOC-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] +; DEBUGLOC-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]] ; DEBUGLOC-NEXT: ret void, !dbg [[DBG41:![0-9]+]] ; @@ -118,17 +118,21 @@ define void @test3(ptr %x) { ; reduce the alignment. ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8 -; CHECK-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2 +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8 +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false) ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: @test3( ; DEBUGLOC-NEXT: entry: -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] -; DEBUGLOC-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8, !dbg [[DBG49:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg [[DBG47:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] +; DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg [[DBG48:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] +; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG49:![0-9]+]] ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] -; DEBUGLOC-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2, !dbg [[DBG51:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false), !dbg [[DBG51:![0-9]+]] ; DEBUGLOC-NEXT: ret void, !dbg [[DBG52:![0-9]+]] ; diff --git a/llvm/test/Transforms/SROA/alloca-address-space.ll b/llvm/test/Transforms/SROA/alloca-address-space.ll index b06f269d806a5..d4f305c39c8fe 100644 --- a/llvm/test/Transforms/SROA/alloca-address-space.ll +++ b/llvm/test/Transforms/SROA/alloca-address-space.ll @@ -10,8 +10,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1) define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_1_1( -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) @@ -22,8 +22,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) { ; CHECK-LABEL: @test_address_space_1_0( -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) @@ -34,8 +34,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) { define void @test_address_space_0_1(ptr addrspace(2) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_0_1( -; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(2) [[A:%.*]], align 2 -; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(2) [[A:%.*]], align 2 +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll index a57cd049b01aa..5ac8ed8c6e6a3 100644 --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -141,7 +141,10 @@ define void @test3(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [42 x i8], align 1 ; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [99 x i8], align 1 +; CHECK-NEXT: [[A_SROA_32:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[A_SROA_15:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: [[A_SROA_16:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_235:%.*]] = alloca [7 x i8], align 1 ; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca [85 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 8 [[SRC:%.*]], i32 42, i1 false), !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 42 @@ -149,74 +152,87 @@ define void @test3(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 43 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 99, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_32_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 142 -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A_SROA_32_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_32]], ptr align 2 [[A_SROA_32_0_SRC_SROA_IDX]], i32 16, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_15_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 158 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15]], ptr align 2 [[A_SROA_15_0_SRC_SROA_IDX]], i32 42, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_16_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 200 -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[A_SROA_16_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 8 [[A_SROA_16_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_23_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 207 ; CHECK-NEXT: [[A_SROA_23_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_23_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_235_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 208 -; CHECK-NEXT: [[A_SROA_235_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[A_SROA_235_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235]], ptr align 8 [[A_SROA_235_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 215 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_0_SRC_SROA_IDX]], i32 85, i1 false), !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i8> [[A_SROA_32_SROA_0_0_COPYLOAD]], i8 1, i32 0 -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND27:%.*]] = select <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i8> <i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VEC_INSERT]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND25:%.*]] = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i8> <i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND27]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND:%.*]] = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i8> <i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 1> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND25]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_1_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i8> <i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 2> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_2_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i8> <i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 3> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_1_VECBLEND]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_3_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 4> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_2_VECBLEND]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_4_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 5> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_3_VECBLEND]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_5_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 6> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_4_VECBLEND]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_6_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 7> to <8 x i8>), i32 7), i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_5_VECBLEND]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_7_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 8> to <8 x i8>), i32 7), i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_6_VECBLEND]] -; CHECK-NEXT: [[A_SROA_32_SROA_0_8_VECBLEND:%.*]] = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> <i64 9> to <8 x i8>), i32 7)>, <16 x i8> [[A_SROA_32_SROA_0_7_VECBLEND]] -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_INSERT:%.*]] = insertelement <7 x i8> [[A_SROA_16_SROA_0_0_COPYLOAD]], i8 1, i32 0 -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND18:%.*]] = select <7 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <7 x i8> <i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VEC_INSERT]] -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>, <7 x i8> <i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND18]] -; CHECK-NEXT: [[A_SROA_16_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <7 x i8> <i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 2> to <4 x i8>), i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND]] -; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false>, <7 x i8> <i8 undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_1_VECBLEND]] -; CHECK-NEXT: [[A_SROA_16_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> <i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <7 x i8> <i8 undef, i8 undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>, <7 x i8> <i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_0_COPYLOAD]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_INSERT:%.*]] = insertelement <7 x i8> [[A_SROA_235_SROA_0_0_VECBLEND]], i8 1, i32 1 -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND13:%.*]] = select <7 x i1> <i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <7 x i8> <i8 undef, i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> <i16 1> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VEC_INSERT]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <7 x i8> <i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 1> to <4 x i8>), i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND13]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false>, <7 x i8> <i8 undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 3> to <4 x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> <i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <7 x i8> <i8 undef, i8 undef, i8 undef, i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> <i32 4> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND]] +; CHECK-NEXT: store i8 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: store i16 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: store i32 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA7:![0-9]+]] +; CHECK-NEXT: store i64 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA9:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 1 +; CHECK-NEXT: store i64 2, ptr [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA11:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 2 +; CHECK-NEXT: store i64 3, ptr [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA13:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 3 +; CHECK-NEXT: store i64 4, ptr [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX]], align 1, !tbaa [[TBAA15:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 4 +; CHECK-NEXT: store i64 5, ptr [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX]], align 1, !tbaa [[TBAA17:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 5 +; CHECK-NEXT: store i64 6, ptr [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX]], align 1, !tbaa [[TBAA19:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 6 +; CHECK-NEXT: store i64 7, ptr [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX]], align 1, !tbaa [[TBAA21:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 7 +; CHECK-NEXT: store i64 8, ptr [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX]], align 1, !tbaa [[TBAA23:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 8 +; CHECK-NEXT: store i64 9, ptr [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX]], align 1, !tbaa [[TBAA25:![0-9]+]] +; CHECK-NEXT: store i8 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA27:![0-9]+]] +; CHECK-NEXT: store i16 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA29:![0-9]+]] +; CHECK-NEXT: store i32 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA31:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 1 +; CHECK-NEXT: store i32 2, ptr [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA33:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 +; CHECK-NEXT: store i32 3, ptr [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA35:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 3 +; CHECK-NEXT: store i32 4, ptr [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA37:![0-9]+]] +; CHECK-NEXT: store i32 1, ptr [[A_SROA_235]], align 1, !tbaa [[TBAA39:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 +; CHECK-NEXT: store i8 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11]], align 1, !tbaa [[TBAA41:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 +; CHECK-NEXT: store i16 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10]], align 1, !tbaa [[TBAA43:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 +; CHECK-NEXT: store i32 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA45:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 +; CHECK-NEXT: store i32 3, ptr [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA47:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 3 +; CHECK-NEXT: store i32 4, ptr [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA49:![0-9]+]] ; CHECK-NEXT: [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_15]], i64 39 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], ptr align 1 [[SRC]], i32 3, i1 false), !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], ptr align 1 [[SRC]], i32 3, i1 false), !tbaa [[TBAA51:![0-9]+]] ; CHECK-NEXT: [[A_SROA_16_197_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3 -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD21:%.*]] = load <5 x i8>, ptr [[A_SROA_16_197_SRC_SROA_IDX]], align 1, !tbaa [[TBAA3]] -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_16_SROA_0_0_COPYLOAD21]], <5 x i8> poison, <7 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 undef> -; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND22:%.*]] = select <7 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <7 x i8> [[A_SROA_16_SROA_0_0_VEC_EXPAND]], <7 x i8> [[A_SROA_16_SROA_0_3_VECBLEND]] -; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND23:%.*]] = select <7 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>, <7 x i8> <i8 undef, i8 undef, i8 42, i8 42, i8 42, i8 42, i8 42>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND22]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND11:%.*]] = select <7 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <7 x i8> <i8 42, i8 42, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_3_VECBLEND]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_COPYLOAD:%.*]] = load <5 x i8>, ptr [[SRC]], align 1, !tbaa [[TBAA5:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_235_SROA_0_1_COPYLOAD]], <5 x i8> poison, <7 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef> -; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND15:%.*]] = select <7 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>, <7 x i8> [[A_SROA_235_SROA_0_1_VEC_EXPAND]], <7 x i8> [[A_SROA_235_SROA_0_0_VECBLEND11]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_COPYLOAD:%.*]] = load <5 x i8>, ptr [[SRC]], align 1, !tbaa [[TBAA7:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_235_SROA_0_2_COPYLOAD]], <5 x i8> poison, <7 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4> -; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND16:%.*]] = select <7 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>, <7 x i8> [[A_SROA_235_SROA_0_2_VEC_EXPAND]], <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND15]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 1 [[A_SROA_16_197_SRC_SROA_IDX]], i32 5, i1 false), !tbaa [[TBAA51]] +; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12]], i8 42, i32 5, i1 false), !tbaa [[TBAA53:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_235]], i8 42, i32 2, i1 false), !tbaa [[TBAA53]] +; CHECK-NEXT: [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA55:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA57:![0-9]+]] ; CHECK-NEXT: [[A_SROA_31_210_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 5 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_210_SRC_SROA_IDX]], i32 3, i1 false), !tbaa [[TBAA7]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 42, i1 false), !tbaa [[TBAA9:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_210_SRC_SROA_IDX]], i32 3, i1 false), !tbaa [[TBAA57]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 42, i1 false), !tbaa [[TBAA59:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 42 -; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA59]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 43 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 99, i1 false), !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 99, i1 false), !tbaa [[TBAA59]] ; CHECK-NEXT: [[A_SROA_32_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 142 -; CHECK-NEXT: store <16 x i8> [[A_SROA_32_SROA_0_8_VECBLEND]], ptr [[A_SROA_32_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_32_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_32]], i32 16, i1 false), !tbaa [[TBAA59]] ; CHECK-NEXT: [[A_SROA_15_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 158 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_15]], i32 42, i1 false), !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_15]], i32 42, i1 false), !tbaa [[TBAA59]] ; CHECK-NEXT: [[A_SROA_16_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 200 -; CHECK-NEXT: store <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND23]], ptr [[A_SROA_16_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_16]], i32 7, i1 false), !tbaa [[TBAA59]] ; CHECK-NEXT: [[A_SROA_23_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 207 -; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA59]] ; CHECK-NEXT: [[A_SROA_235_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 208 -; CHECK-NEXT: store <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND16]], ptr [[A_SROA_235_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_235]], i32 7, i1 false), !tbaa [[TBAA59]] ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 215 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_31]], i32 85, i1 false), !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_31]], i32 85, i1 false), !tbaa [[TBAA59]] ; CHECK-NEXT: ret void ; @@ -299,30 +315,60 @@ entry: define void @test4(ptr %dst, ptr %src) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [20 x i8], align 1 +; CHECK-NEXT: [[A_SROA_2_SROA_4:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [10 x i8], align 1 +; CHECK-NEXT: [[A_SROA_31_SROA_5:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_6_SROA_4:%.*]] = alloca [7 x i8], align 1 ; CHECK-NEXT: [[A_SROA_7:%.*]] = alloca [40 x i8], align 1 -; CHECK-NEXT: [[A_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <20 x i8>, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 1 [[SRC:%.*]], i32 20, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 20 -; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 2 +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_2_SROA_4]], ptr align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 30 -; CHECK-NEXT: [[A_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 10, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 40 -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 2 +; CHECK-NEXT: [[A_SROA_31_SROA_4_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_6_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 50 -; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 2 +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_6_SROA_4]], ptr align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_7_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 60 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7]], ptr align 1 [[A_SROA_7_0_SRC_SROA_IDX]], i32 40, i1 false), !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_31_SROA_0_2_VEC_INSERT:%.*]] = insertelement <10 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], i8 0, i32 2 -; CHECK-NEXT: store <20 x i8> [[A_SROA_0_SROA_0_0_COPYLOAD]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA11:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_2_SROA_4]], i32 7, i1 false), !tbaa [[TBAA3]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_6_SROA_4]], i32 7, i1 false), !tbaa [[TBAA5]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 20, i1 false), !tbaa [[TBAA7]] ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 20 -; CHECK-NEXT: store <10 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] +; CHECK-NEXT: store i16 [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 2 +; CHECK-NEXT: store i8 [[A_SROA_2_SROA_3_0_COPYLOAD]], ptr [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_2_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 30 -; CHECK-NEXT: store <10 x i8> [[A_SROA_3_SROA_0_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 10, i1 false), !tbaa [[TBAA7]] ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 40 -; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] +; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 2 +; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_31_SROA_5]], i32 7, i1 false), !tbaa [[TBAA7]] ; CHECK-NEXT: [[A_SROA_6_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 50 -; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] +; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 2 +; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 3 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_6_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] ; CHECK-NEXT: [[A_SROA_7_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 60 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_7]], i32 40, i1 false), !tbaa [[TBAA11]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_7]], i32 40, i1 false), !tbaa [[TBAA7]] ; CHECK-NEXT: ret void ; @@ -409,8 +455,8 @@ define void @test7(ptr %src, ptr %dst) { ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_COPYLOAD]], ptr [[A_SROA_0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA13:![0-9]+]] -; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA13]] +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA3]] ; CHECK-NEXT: ret void ; @@ -430,9 +476,9 @@ define %S2 @test8(ptr %arg) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S2_NEXT_PTR:%.*]] = getelementptr [[S2:%.*]], ptr [[ARG:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[S2_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_PTR]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, !tbaa [[TBAA13]] +; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[S2_NEXT_NEXT_PTR:%.*]] = getelementptr [[S2]], ptr [[S2_NEXT]], i64 0, i32 1 -; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], align 8, !tbaa [[TBAA11]] +; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], align 8, !tbaa [[TBAA7]] ; CHECK-NEXT: [[RESULT1:%.*]] = insertvalue [[S2]] poison, ptr [[S2_NEXT_S1]], 0 ; CHECK-NEXT: [[RESULT2:%.*]] = insertvalue [[S2]] [[RESULT1]], ptr [[S2_NEXT_NEXT]], 1 ; CHECK-NEXT: ret [[S2]] [[RESULT2]] @@ -679,7 +725,7 @@ define void @test16(ptr %src, ptr %dst) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i24, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa [[TBAA15:![0-9]+]] +; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa [[TBAA5]] ; CHECK-NEXT: ret void ; @@ -698,7 +744,7 @@ define void @test17(ptr %src, ptr %dst) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca [3 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[A]], ptr [[SRC:%.*]], i32 4, i1 true), !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr [[A]], i32 4, i1 true), !tbaa [[TBAA13]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr [[A]], i32 4, i1 true), !tbaa [[TBAA3]] ; CHECK-NEXT: ret void ; @@ -719,12 +765,12 @@ define void @test18(ptr %src, ptr %dst, i32 %size) { ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i32, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 ; CHECK-NEXT: [[A_SROA_3_0_COPYLOAD:%.*]] = load i32, ptr [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA13]] -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA15]] -; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa [[TBAA17:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA3]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA5]] +; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4 -; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA17]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA19:![0-9]+]] +; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA11]] ; CHECK-NEXT: ret void ; @@ -959,7 +1005,8 @@ define void @PR14034(ptr %ptr, ptr %ptr2) { ; thing is to handle empty structs gracefully. ; CHECK-LABEL: @PR14034( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <12 x i8> undef, ptr [[PTR2:%.*]], align 1 +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [12 x i8], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[PTR2:%.*]], ptr align 8 [[A_SROA_0]], i32 12, i1 false) ; CHECK-NEXT: ret void ; @@ -1500,8 +1547,8 @@ define void @test24(ptr %src, ptr %dst) { ; CHECK-NEXT: [[A:%.*]] = alloca i64, align 16 ; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load volatile i64, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD]], ptr [[A]], align 16, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align 16, !tbaa [[TBAA13]] -; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA13]] +; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align 16, !tbaa [[TBAA3]] +; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA3]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SROA/pointer-offset-size.ll b/llvm/test/Transforms/SROA/pointer-offset-size.ll index bf3c63c1ae7a3..76b52098a7e62 100644 --- a/llvm/test/Transforms/SROA/pointer-offset-size.ll +++ b/llvm/test/Transforms/SROA/pointer-offset-size.ll @@ -8,7 +8,8 @@ target datalayout = "e-p:64:64:64:32" define i16 @test(ptr %ts2.i) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: store <3 x i8> undef, ptr [[TS2_I:%.*]], align 1 +; CHECK-NEXT: [[S_SROA_0:%.*]] = alloca [3 x i8], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS2_I:%.*]], ptr align 8 [[S_SROA_0]], i32 3, i1 false) ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[TS2_I]], align 2 ; CHECK-NEXT: ret i16 [[TMP0]] ; diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index 74ddf7573ad4c..7d2aeaaff57bc 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -46,7 +46,8 @@ load_i1: define void @memcpy_fp80_padding() { ; CHECK-LABEL: @memcpy_fp80_padding( -; CHECK-NEXT: [[X_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr @foo_copy_source, align 16 +; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[X_SROA_0]], ptr align 16 @foo_copy_source, i32 16, i1 false) ; CHECK-NEXT: [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 16), align 16 ; CHECK-NEXT: [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 24), align 8 ; CHECK-NEXT: store i64 [[X_SROA_1_0_COPYLOAD]], ptr @i64_sink, align 4 diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll b/llvm/test/Transforms/SROA/tbaa-struct.ll index 3d55b72e91a60..3e9332c5b11c0 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct.ll @@ -10,8 +10,7 @@ declare <2 x float> @foo(ptr %0) define void @bar(ptr %y2) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(ptr [[Y2:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X14]] to <2 x i32> -; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[Y2]], align 4, !tbaa.struct !0 +; CHECK-NEXT: store <2 x float> [[X14]], ptr [[Y2]], align 4, !tbaa.struct !0 ; CHECK-NEXT: ret void ; %x7 = alloca %vector diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll index e7d5f4e74de52..1c81fc6163bbc 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -9,11 +9,12 @@ declare double @subcall(double %g, i32 %m) define double @bar(ptr %wishart) { ; CHECK-LABEL: @bar( +; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct !0 ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8 ; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct !7 ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12 -; CHECK-NEXT: [[TMP_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <4 x i8>, ptr [[TMP_SROA_3_0_WISHART_SROA_IDX]], align 4, !tbaa.struct !8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct !8 ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) ; CHECK-NEXT: ret double [[CALL]] ; diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index 569dd05918332..bdf50ea39af33 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -567,9 +567,9 @@ define <4 x float> @test12(<4 x i32> %val) { define void @swap-8bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-8bytes( -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <8 x i8>, ptr [[X:%.*]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 8, i1 false) -; CHECK-NEXT: store <8 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 +; CHECK-NEXT: store i64 [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i32] @@ -581,9 +581,10 @@ define void @swap-8bytes(ptr %x, ptr %y) { define void @swap-7bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-7bytes( -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[X:%.*]], align 1 +; CHECK-NEXT: [[TMP:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 7, i1 false) ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 7, i1 false) -; CHECK-NEXT: store <7 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 7, i1 false) ; CHECK-NEXT: ret void ; %tmp = alloca [7 x i8] @@ -595,9 +596,10 @@ define void @swap-7bytes(ptr %x, ptr %y) { define void @swap-16bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-16bytes( -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[X:%.*]], align 1 +; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i64], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 16, i1 false) ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 16, i1 false) -; CHECK-NEXT: store <16 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 16, i1 false) ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i64] @@ -609,9 +611,10 @@ define void @swap-16bytes(ptr %x, ptr %y) { define void @swap-15bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-15bytes( -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <15 x i8>, ptr [[X:%.*]], align 1 +; CHECK-NEXT: [[TMP:%.*]] = alloca [15 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 15, i1 false) ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 15, i1 false) -; CHECK-NEXT: store <15 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 15, i1 false) ; CHECK-NEXT: ret void ; %tmp = alloca [15 x i8] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits