llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) <details> <summary>Changes</summary> If a split memory access introduced by SROA accesses precisely a single field of the original operation's !tbaa.struct, use the !tbaa tag for the accessed field directly instead of the full !tbaa.struct. InstCombine already had a similar logic. Motivation for this and follow-on patches is to improve codegen for libc++, where using memcpy limits optimizations, like vectorization for code iteration over std::vector<std::complex<float>>: https://godbolt.org/z/f3vqYos3c Depends on https://github.com/llvm/llvm-project/pull/81285. --- Full diff: https://github.com/llvm/llvm-project/pull/81289.diff 5 Files Affected: - (modified) llvm/include/llvm/IR/Metadata.h (+2) - (modified) llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp (+13) - (modified) llvm/lib/Transforms/Scalar/SROA.cpp (+34-14) - (modified) llvm/test/Transforms/SROA/tbaa-struct2.ll (+10-11) - (modified) llvm/test/Transforms/SROA/tbaa-struct3.ll (+8-8) ``````````diff diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 6f23ac44dee968..33363a271d4823 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -849,6 +849,8 @@ struct AAMDNodes { /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the /// field at offset 0, get the TBAA tag describing the accessed field. AAMDNodes adjustForAccess(unsigned AccessSize); + AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy, + const DataLayout &DL); }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index bfd70414c0340c..b2dc451d581939 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -833,3 +833,16 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { } return New; } + +AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, + const DataLayout &DL) { + + AAMDNodes New = shift(Offset); + if (!DL.typeSizeEqualsStoreSize(AccessTy)) + return New; + TypeSize Size = DL.getTypeStoreSize(AccessTy); + if (Size.isScalable()) + return New; + + return New.adjustForAccess(Size.getKnownMinValue()); +} diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 138dc38b5c14ce..f24cbbc1fe0591 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2914,7 +2914,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { // Do this after copyMetadataForLoad() to preserve the TBAA shift. if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.adjustForAccess( + NewBeginOffset - BeginOffset, NewLI->getType(), DL)); // Try to preserve nonnull metadata V = NewLI; @@ -2936,7 +2937,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.adjustForAccess( + NewBeginOffset - BeginOffset, NewLI->getType(), DL)); + if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, @@ -3011,7 +3014,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); Pass.DeadInsts.push_back(&SI); // NOTE: Careful to use OrigV rather than V. @@ -3038,7 +3042,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, Store, Store->getPointerOperand(), @@ -3097,8 +3102,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { } NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); + } if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) @@ -3280,8 +3287,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); + } migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, New, New->getPointerOperand(), V, DL); @@ -3486,7 +3495,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + Load->getType(), DL)); Src = Load; } @@ -3507,8 +3517,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + Src->getType(), DL)); + } APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0); if (IsDest) { @@ -3836,7 +3848,8 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); if (AATags && GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) - Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + Load->setAAMetadata( + AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL)); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); @@ -3887,8 +3900,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { APInt Offset( DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset); - if (AATags) - Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); + if (AATags) { + Store->setAAMetadata(AATags.adjustForAccess( + Offset.getZExtValue(), ExtractValue->getType(), DL)); + } // migrateDebugInfo requires the base Alloca. Walk to it from this gep. // If we cannot (because there's an intervening non-const or unbounded @@ -4542,6 +4557,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { Value *StoreBasePtr = SI->getPointerOperand(); IRB.SetInsertPoint(SI); + AAMDNodes AATags = SI->getAAMetadata(); LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); @@ -4561,6 +4577,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group, LLVMContext::MD_DIAssignID}); + + if (AATags) + PStore->setAAMetadata( + AATags.adjustForAccess(PartOffset, PLoad->getType(), DL)); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll index 1fd37e82d67775..02c99a2b329457 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -13,9 +13,9 @@ define double @bar(ptr %wishart) { ; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8 -; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] +; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) ; CHECK-NEXT: ret double [[CALL]] ; @@ -38,15 +38,14 @@ define double @bar(ptr %wishart) { ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, !1, i64 8, i64 4, !5} -; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0} -; CHECK: [[META2:![0-9]+]] = !{!"double", !3, i64 0} -; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0} -; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"} -; CHECK: [[META5:![0-9]+]] = !{!6, !6, i64 0} -; CHECK: [[META6:![0-9]+]] = !{!"int", !3, i64 0} -; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 4, !5} -; CHECK: [[TBAA_STRUCT8]] = !{} +; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, [[META1:![0-9]+]], i64 8, i64 4, [[TBAA5]]} +; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"int", [[META3]], i64 0} +; CHECK: [[TBAA_STRUCT7]] = !{} ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 4910e0e07ae380..603e7d708647fc 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -7,9 +7,9 @@ define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(2 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[B]] to i32 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] ; CHECK-NEXT: [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4 -; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA1:![0-9]+]] ; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[RES]], align 8 ; CHECK-NEXT: ret void ; @@ -29,9 +29,9 @@ define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) { ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8 -; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] +; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0]] ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 -; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] +; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA1]] ; CHECK-NEXT: ret void ; entry: @@ -53,7 +53,7 @@ define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceabl ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B]] to i32 ; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5]] +; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -98,10 +98,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C++ TBAA"} ;. -; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[META1:![0-9]+]], i64 4, i64 4, [[META1]]} -; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 4, [[TBAA1]]} +; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[META1]]} +; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]} ;. `````````` </details> https://github.com/llvm/llvm-project/pull/81289 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits