[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) Changes This allows -passes option to target function passes in the codegen pipeline by their CL name. --- Full diff: https://github.com/llvm/llvm-project/pull/138828.diff 3 Files Affected: - (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1) - (modified) llvm/lib/Passes/PassBuilder.cpp (+4) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+3-3) ``diff diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index c6c00e8f25882..8717b79b26968 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -70,7 +70,6 @@ FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("select-optimize", SelectOptimizePass(TM)) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM)) FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) -FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass()) @@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME) #endif +DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass) #undef DUMMY_FUNCTION_PASS #ifndef DUMMY_MACHINE_MODULE_PASS diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 5f7ce13ad8a3e..a9d192a7fad55 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -95,6 +95,7 @@ #include "llvm/CodeGen/ExpandLargeDivRem.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandPostRAPseudos.h" +#include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FEntryInserter.h" #include "llvm/CodeGen/FinalizeISel.h" #include "llvm/CodeGen/FixupStatepointCallerSaved.h" @@ -155,6 +156,7 @@ #include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h" #include "llvm/CodeGen/RemoveRedundantDebugValues.h" #include "llvm/CodeGen/RenameIndependentSubregs.h" +#include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" #include "llvm/CodeGen/SanitizerBinaryMetadata.h" #include "llvm/CodeGen/SelectOptimize.h" @@ -522,6 +524,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "PassRegistry.def" +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) \ diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 0d26b12a4a5e7..634a7fb6eb8e9 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -7,11 +7,11 @@ ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-bind
[llvm-branch-commits] [flang] [flang][fir] Add `fir.local` op for locality specifiers (PR #138505)
https://github.com/tblah approved this pull request. Thanks for the updates https://github.com/llvm/llvm-project/pull/138505 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 72fd8a6b0d8d304992a804e7e05367389feedc2c Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..36c1305a7c5df 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 08d0405345f57..4293df8c13571 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -394,6 +409,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [flang] [flang][fir] Basic lowering `fir.do_concurrent` locality specs to `fir.do_loop ... unordered` (PR #138512)
@@ -162,7 +173,52 @@ class DoConcurrentConversion assert(loop.getRegion().hasOneBlock()); mlir::Block &loopBlock = loop.getRegion().getBlocks().front(); -// Collect iteration variable(s) allocations do that we can move them +// Handle localization +if (!loop.getLocalVars().empty()) { + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(&loop.getRegion().front()); + + std::optional localSyms = loop.getLocalSyms(); + + for (auto [localVar, localArg, localizerSym] : llvm::zip_equal( + loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) { +mlir::SymbolRefAttr localizerName = +llvm::cast(localizerSym); +fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName); + +mlir::Value localAlloc = +rewriter.create(loop.getLoc(), localizer.getType()); tblah wrote: Please could you use TODO(loc, "message") so that the compiler crashes (in a controlled way) instead of producing incorrect code if any of these do sneak through. https://github.com/llvm/llvm-project/pull/138512 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [BasicAA] Gracefully handle large LocationSize (#138528) (PR #138681)
https://github.com/fhahn approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/138681 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Fix broken implicit cast to generic address space (PR #138863)
https://github.com/svenvh approved this pull request. https://github.com/llvm/llvm-project/pull/138863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP] cancel(lation point) taskgroup LLVMIR (PR #137841)
https://github.com/skatrak approved this pull request. Thank you, this LGTM. I just have a non-blocking suggestion. https://github.com/llvm/llvm-project/pull/137841 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] Update serializer for improved template handling (PR #138065)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/138065 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-doc] Implement setupTemplateValue for HTMLMustacheGenerator (PR #138064)
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/138064 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From c5cfc13dc2b4c70860fddf9060805c57a932ba22 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 73 +++-- llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++ 5 files changed, 104 insertions(+), 172 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index ba11ddbb5b731..d3cd81c146280 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1843,7 +1843,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2281,7 +2281,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2a68903c34cef..8e77a542ab029 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12218,7 +12218,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12911,17 +12911,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->getAddressingMode() == ISD::PRE_INC) { -if (auto *C = dyn_cast(N->getOffset(
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From c5cfc13dc2b4c70860fddf9060805c57a932ba22 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 73 +++-- llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++ 5 files changed, 104 insertions(+), 172 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index ba11ddbb5b731..d3cd81c146280 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1843,7 +1843,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2281,7 +2281,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2a68903c34cef..8e77a542ab029 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12218,7 +12218,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12911,17 +12911,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->getAddressingMode() == ISD::PRE_INC) { -if (auto *C = dyn_cast(N->getOffset(
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 4783f04e8ce1786d3cb40e8573cd602b9f0979b5 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 35 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 207 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6b3467573a0a2..d0ea0a607aed7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,38 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getValueType(0).getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ExtType, dl, MemIntVT, IntVT, Ch, Ptr, + LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, + DAG.getIntPtrConstant(0, dl)); + SDValue ExtractHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiIntVT, ALD, + DAG.getIntPtrConstant(1, dl)); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHE
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From c476a2452e64f7f6127ad0265c4c6aba7a08531a Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635 >From 939a68f81bd2add47607063c83123c252cc156c8 Mon Sep 17 00:00:00 2001 From: jofernau_amdeng Date: Tue, 6 May 2025 01:48:11 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td| 7 llvm/test/CodeGen/X86/atomic-load-store.ll | 43 -- llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 167e27eddd71e..8ad8a0a6194d6 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9ee8b4fc5ac7f..935d058a52f8f 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -149,8 +149,7 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { define <2 x i8> @atomic_vec2_i8(ptr %x) { ; CHECK3-LABEL: atomic_vec2_i8: ; CHECK3: ## %bb.0: -; CHECK3-NEXT:movzwl (%rdi), %eax -; CHECK3-NEXT:movd %eax, %xmm0 +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK3-NEXT:retq ; ; CHECK0-LABEL: atomic_vec2_i8: @@ -165,11 +164,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { } define <2 x i16> @atomic_vec2_i16(ptr %x) { -; CHECK-LABEL: atomic_vec2_i16: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } @@ -177,8 +180,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) { define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { ; CHECK-LABEL: atomic_vec2_ptr270: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret @@ -187,8 +189,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_i32_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret @@ -197,8 +198,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) { define <2 x float> @atomic_vec2_float_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_float_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret @@ -354,11 +354,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_i8: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec4_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec4_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } @@ -366,8 +370,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { define <4 x i16> @atomic_vec4_i16(
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 8e1beaef4c295d500183bd82e1ddb0010128d4b2 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5473ae91ddbf7..053222660e5cd 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -381,6 +381,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -404,6 +419,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 4783f04e8ce1786d3cb40e8573cd602b9f0979b5 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 35 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 207 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6b3467573a0a2..d0ea0a607aed7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,38 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getValueType(0).getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ExtType, dl, MemIntVT, IntVT, Ch, Ptr, + LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, + DAG.getIntPtrConstant(0, dl)); + SDValue ExtractHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiIntVT, ALD, + DAG.getIntPtrConstant(1, dl)); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHE
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 4783f04e8ce1786d3cb40e8573cd602b9f0979b5 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 35 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 207 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6b3467573a0a2..d0ea0a607aed7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,38 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getValueType(0).getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ExtType, dl, MemIntVT, IntVT, Ch, Ptr, + LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, + DAG.getIntPtrConstant(0, dl)); + SDValue ExtractHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiIntVT, ALD, + DAG.getIntPtrConstant(1, dl)); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHE
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 01f388d501562796484ac3d1be8e29c27a33b702 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635 >From 939a68f81bd2add47607063c83123c252cc156c8 Mon Sep 17 00:00:00 2001 From: jofernau_amdeng Date: Tue, 6 May 2025 01:48:11 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td| 7 llvm/test/CodeGen/X86/atomic-load-store.ll | 43 -- llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 167e27eddd71e..8ad8a0a6194d6 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9ee8b4fc5ac7f..935d058a52f8f 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -149,8 +149,7 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { define <2 x i8> @atomic_vec2_i8(ptr %x) { ; CHECK3-LABEL: atomic_vec2_i8: ; CHECK3: ## %bb.0: -; CHECK3-NEXT:movzwl (%rdi), %eax -; CHECK3-NEXT:movd %eax, %xmm0 +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK3-NEXT:retq ; ; CHECK0-LABEL: atomic_vec2_i8: @@ -165,11 +164,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { } define <2 x i16> @atomic_vec2_i16(ptr %x) { -; CHECK-LABEL: atomic_vec2_i16: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } @@ -177,8 +180,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) { define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { ; CHECK-LABEL: atomic_vec2_ptr270: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret @@ -187,8 +189,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_i32_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret @@ -197,8 +198,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) { define <2 x float> @atomic_vec2_float_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_float_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret @@ -354,11 +354,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_i8: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec4_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec4_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } @@ -366,8 +370,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { define <4 x i16> @atomic_vec4_i16(
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From c476a2452e64f7f6127ad0265c4c6aba7a08531a Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635 >From 939a68f81bd2add47607063c83123c252cc156c8 Mon Sep 17 00:00:00 2001 From: jofernau_amdeng Date: Tue, 6 May 2025 01:48:11 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td| 7 llvm/test/CodeGen/X86/atomic-load-store.ll | 43 -- llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 167e27eddd71e..8ad8a0a6194d6 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9ee8b4fc5ac7f..935d058a52f8f 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -149,8 +149,7 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { define <2 x i8> @atomic_vec2_i8(ptr %x) { ; CHECK3-LABEL: atomic_vec2_i8: ; CHECK3: ## %bb.0: -; CHECK3-NEXT:movzwl (%rdi), %eax -; CHECK3-NEXT:movd %eax, %xmm0 +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK3-NEXT:retq ; ; CHECK0-LABEL: atomic_vec2_i8: @@ -165,11 +164,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { } define <2 x i16> @atomic_vec2_i16(ptr %x) { -; CHECK-LABEL: atomic_vec2_i16: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } @@ -177,8 +180,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) { define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { ; CHECK-LABEL: atomic_vec2_ptr270: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret @@ -187,8 +189,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_i32_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret @@ -197,8 +198,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) { define <2 x float> @atomic_vec2_float_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_float_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret @@ -354,11 +354,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_i8: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec4_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec4_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } @@ -366,8 +370,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { define <4 x i16> @atomic_vec4_i16(
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From c476a2452e64f7f6127ad0265c4c6aba7a08531a Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 8e1beaef4c295d500183bd82e1ddb0010128d4b2 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5473ae91ddbf7..053222660e5cd 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -381,6 +381,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -404,6 +419,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 45d0296e3a33d4e483dc6869a058a02f3074c77a Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 96 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 154 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..6b3467573a0a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,77 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(!"Unimplemented"); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = + loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl, DAG); + + // Modified the
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 70a0cadf649521d83669dbb08100c81873c7231b Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f04603867a587..749fa34e791af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 8e1beaef4c295d500183bd82e1ddb0010128d4b2 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5473ae91ddbf7..053222660e5cd 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -381,6 +381,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -404,6 +419,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] Reapply "IR: Remove reference counts from ConstantData (#137314)" (PR #138962)
arsenm wrote: ### Merge activity * **May 8, 1:53 AM EDT**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/138962). https://github.com/llvm/llvm-project/pull/138962 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Read the address space from the ABIArgInfo (PR #138865)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/138865 >From 4a1cda956b007b112584f60390162a1cf89f3587 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 7 May 2025 14:39:38 +0200 Subject: [PATCH] clang: Read the address space from the ABIArgInfo Do not assume it's the alloca address space, we have an explicit address space to use for the argument already. Also use the original value's type instead of assuming DefaultAS. --- clang/lib/CodeGen/CGCall.cpp | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 76f3dafc8b2bd..1e25de06c89b9 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5387,16 +5387,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (!NeedCopy) { // Skip the extra memcpy call. llvm::Value *V = getAsNaturalPointerTo(Addr, I->Ty); - auto *T = llvm::PointerType::get( - CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace()); + auto *T = llvm::PointerType::get(CGM.getLLVMContext(), + ArgInfo.getIndirectAddrSpace()); // FIXME: This should not depend on the language address spaces, and // only the contextual values. If the address space mismatches, see if // we can look through a cast to a compatible address space value, // otherwise emit a copy. llvm::Value *Val = getTargetHooks().performAddrSpaceCast( - *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, - true); + *this, V, I->Ty.getAddressSpace(), CGM.getASTAllocaAddressSpace(), + T, true); if (ArgHasMaybeUndefAttr) Val = Builder.CreateFreeze(Val); IRCallArgs[FirstIRArg] = Val; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [polly] Reapply "IR: Remove uselist for constantdata (#137313)" (PR #138961)
arsenm wrote: ### Merge activity * **May 8, 1:53 AM EDT**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/138961). https://github.com/llvm/llvm-project/pull/138961 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Remove dest LangAS argument from performAddrSpaceCast (PR #138866)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/138866 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -427,6 +428,29 @@ Value *VPInstruction::generate(VPTransformState &State) { {PredTy, ScalarTC->getType()}, {VIVElem0, ScalarTC}, nullptr, Name); } + // Count the number of bits set in each lane and reduce the result to a scalar + case VPInstruction::PopCount: { +Value *Op = State.get(getOperand(0)); +auto *VT = Op->getType(); SamTebbs33 wrote: Done. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
@@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME) #endif +DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass) paperchalice wrote: IR passes are handled in `PassRegistry.def`, just add missing passes to it. https://github.com/llvm/llvm-project/pull/138828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/138830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
@@ -279,6 +278,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME) #endif +DUMMY_FUNCTION_PASS("tlshoist", TLSVariableHoistPass) paperchalice wrote: Also tls hoist pass was removed in pull request 114740. https://github.com/llvm/llvm-project/pull/138828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
@@ -70,7 +70,6 @@ FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("select-optimize", SelectOptimizePass(TM)) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM)) FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) -FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) cdevadas wrote: Just remove it for all. https://github.com/llvm/llvm-project/pull/138828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -418,7 +418,13 @@ class LoopVectorizationPlanner { /// Build VPlans for the specified \p UserVF and \p UserIC if they are /// non-zero or all applicable candidate VFs otherwise. If vectorization and /// interleaving should be avoided up-front, no plans are generated. - void plan(ElementCount UserVF, unsigned UserIC); + /// RTChecks is a list of pointer pairs that should be checked for aliasing, + /// setting HasAliasMask to true in the case that an alias mask is generated SamTebbs33 wrote: Done, thanks. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635 >From 5b5d94887ed368de16e1395727295527ff67b471 Mon Sep 17 00:00:00 2001 From: jofernau_amdeng Date: Tue, 6 May 2025 01:48:11 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td| 7 llvm/test/CodeGen/X86/atomic-load-store.ll | 43 -- llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 167e27eddd71e..8ad8a0a6194d6 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9ee8b4fc5ac7f..935d058a52f8f 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -149,8 +149,7 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { define <2 x i8> @atomic_vec2_i8(ptr %x) { ; CHECK3-LABEL: atomic_vec2_i8: ; CHECK3: ## %bb.0: -; CHECK3-NEXT:movzwl (%rdi), %eax -; CHECK3-NEXT:movd %eax, %xmm0 +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK3-NEXT:retq ; ; CHECK0-LABEL: atomic_vec2_i8: @@ -165,11 +164,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { } define <2 x i16> @atomic_vec2_i16(ptr %x) { -; CHECK-LABEL: atomic_vec2_i16: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } @@ -177,8 +180,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) { define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { ; CHECK-LABEL: atomic_vec2_ptr270: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret @@ -187,8 +189,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_i32_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret @@ -197,8 +198,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) { define <2 x float> @atomic_vec2_float_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_float_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret @@ -354,11 +354,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_i8: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec4_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec4_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } @@ -366,8 +370,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { define <4 x i16> @atomic_vec4_i16(
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 0e4399dc220b175e78ad110118635bf5e843d768 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 35 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 207 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6b3467573a0a2..d0ea0a607aed7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,38 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getValueType(0).getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ExtType, dl, MemIntVT, IntVT, Ch, Ptr, + LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, + DAG.getIntPtrConstant(0, dl)); + SDValue ExtractHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiIntVT, ALD, + DAG.getIntPtrConstant(1, dl)); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHE
[llvm-branch-commits] [clang] clang: Fix broken implicit cast to generic address space (PR #138863)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) Changes This fixes emitting undefined behavior where a 64-bit generic pointer is written to a 32-bit slot allocated for a private pointer. This can be seen in test/CodeGenOpenCL/amdgcn-automatic-variable.cl's wrong_pointer_alloca. --- Patch is 60.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138863.diff 13 Files Affected: - (modified) clang/lib/CodeGen/CGDecl.cpp (+2-1) - (modified) clang/lib/CodeGen/CGExpr.cpp (+8-9) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+19-1) - (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+1-4) - (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+19-25) - (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+1-4) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+38-52) - (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) - (modified) clang/test/CodeGenOpenCL/blocks.cl (+3-3) - (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+16-32) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+4-6) - (modified) clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl (+3-4) - (modified) clang/test/Index/pipe-size.cl (+1-1) ``diff diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index db34e2738b4cf..1e54e55c5abbb 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1588,7 +1588,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(), + address = CreateTempAlloca(allocaTy, Ty.getAddressSpace(), + allocaAlignment, D.getName(), /*ArraySize=*/nullptr, &AllocaAddr); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 2e01adc51fdf0..7f1308719a71e 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -100,13 +100,11 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits Align, return RawAddress(Alloca, Ty, Align, KnownNonNull); } -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. The alloca is casted to default address space if necessary. -RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, - const Twine &Name, +RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS, + CharUnits Align, const Twine &Name, llvm::Value *ArraySize, RawAddress *AllocaAddr) { - auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); + RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); if (AllocaAddr) *AllocaAddr = Alloca; llvm::Value *V = Alloca.getPointer(); @@ -114,8 +112,9 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - if (getASTAllocaAddressSpace() != LangAS::Default) { -auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + + unsigned DestAddrSpace = getContext().getTargetAddressSpace(DestLangAS); + if (DestAddrSpace != Alloca.getAddressSpace()) { llvm::IRBuilderBase::InsertPointGuard IPG(Builder); // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, // otherwise alloca is inserted at the current insertion point of the @@ -123,8 +122,8 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, if (!ArraySize) Builder.SetInsertPoint(getPostAllocaInsertPoint()); V = getTargetHooks().performAddrSpaceCast( -*this, V, getASTAllocaAddressSpace(), LangAS::Default, -Builder.getPtrTy(DestAddrSpace), /*non-null*/ true); +*this, V, getASTAllocaAddressSpace(), DestLangAS, +Builder.getPtrTy(DestAddrSpace), /*IsNonNull=*/true); } return RawAddress(V, Ty, Align, KnownNonNull); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 561f8f6a2a2fb..c0bc3825f0188 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2861,10 +2861,28 @@ class CodeGenFunction : public CodeGenTypeCache { /// more efficient if the caller knows that the address will not be exposed. llvm
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/138830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From 08e39f28a1f041a72bbbf3ebe520f9136e5aebbc Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [llvm] [AMDGPU] Improve StructurizeCFG pass performance by using SSAUpdaterBulk. (PR #135181)
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/135181 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SSAUpdaterBulk] Add PHI simplification pass. (PR #135180)
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/135180 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/138830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Fix broken implicit cast to generic address space (PR #138863)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/138863?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#138864** https://app.graphite.dev/github/pr/llvm/llvm-project/138864?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138863** https://app.graphite.dev/github/pr/llvm/llvm-project/138863?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/138863?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#138862** https://app.graphite.dev/github/pr/llvm/llvm-project/138862?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/138863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Remove dest LangAS argument from performAddrSpaceCast (PR #138866)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/138866 It isn't used and is redundant with the result pointer type argument. A more reasonable API would only have LangAS parameters, or IR parameters, not both. Not all values have a meaningful value for this. I'm also not sure why we have this at all, it's not overridden by any targets and further simplification is possible. >From 69ac2644f7f6d233e2e74eadb94f6f8283805288 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 7 May 2025 15:04:07 +0200 Subject: [PATCH] clang: Remove dest LangAS argument from performAddrSpaceCast It isn't used and is redundant with the result pointer type argument. A more reasonable API would only have LangAS parameters, or IR parameters, not both. Not all values have a meaningful value for this. I'm also not sure why we have this at all, it's not overridden by any targets and further simplification is possible. --- clang/lib/CodeGen/CGAtomic.cpp | 4 ++-- clang/lib/CodeGen/CGBuiltin.cpp | 8 clang/lib/CodeGen/CGCall.cpp | 11 +++ clang/lib/CodeGen/CGClass.cpp| 4 ++-- clang/lib/CodeGen/CGDecl.cpp | 9 - clang/lib/CodeGen/CGException.cpp| 5 ++--- clang/lib/CodeGen/CGExpr.cpp | 13 ++--- clang/lib/CodeGen/CGExprCXX.cpp | 6 ++ clang/lib/CodeGen/CGExprConstant.cpp | 6 +++--- clang/lib/CodeGen/CGExprScalar.cpp | 5 ++--- clang/lib/CodeGen/CodeGenModule.cpp | 6 +++--- clang/lib/CodeGen/TargetInfo.cpp | 6 +++--- clang/lib/CodeGen/TargetInfo.h | 7 +++ 13 files changed, 39 insertions(+), 51 deletions(-) diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 0af3cd07b13a0..51f0799a792fd 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -1084,8 +1084,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); auto *DestType = llvm::PointerType::get(getLLVMContext(), DestAS); - return getTargetHooks().performAddrSpaceCast( - *this, V, AS, LangAS::opencl_generic, DestType, false); + return getTargetHooks().performAddrSpaceCast(*this, V, AS, DestType, + false); }; Args.add(RValue::get(CastToGenericAddrSpace(Ptr.emitRawPointer(*this), diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e6816736412b8..45e0f69c46902 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4081,8 +4081,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); if (AAS != EAS) { llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); - return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, - EAS, Ty)); + return RValue::get( + getTargetHooks().performAddrSpaceCast(*this, AI, AAS, Ty)); } return RValue::get(AI); } @@ -4103,8 +4103,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); if (AAS != EAS) { llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); - return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, - EAS, Ty)); + return RValue::get( + getTargetHooks().performAddrSpaceCast(*this, AI, AAS, Ty)); } return RValue::get(AI); } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index db8820a8c517e..dd892bada0433 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5242,12 +5242,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) { llvm::Value *V = SRetPtr.getBasePointer(); LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace()); -LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace()); llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(), RetAI.getIndirectAddrSpace()); SRetPtr = SRetPtr.withPointer( -getTargetHooks().performAddrSpaceCast(*this, V, SAS, DAS, Ty, true), +getTargetHooks().performAddrSpaceCast(*this, V, SAS, Ty, true), SRetPtr.isKnownNonNull()); } IRCallArgs[IRFunctionArgs.getSRetArgNo()] = @@ -5392,8 +5391,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // we can look through a cast to a compatible address space value, // otherwise emit a copy. llvm::Value *Val = getTargetHooks().performAddrSpace
[llvm-branch-commits] [clang] clang: Fix broken implicit cast to generic address space (PR #138863)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/138863 This fixes emitting undefined behavior where a 64-bit generic pointer is written to a 32-bit slot allocated for a private pointer. This can be seen in test/CodeGenOpenCL/amdgcn-automatic-variable.cl's wrong_pointer_alloca. Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/OpenCL: Fix special casing OpenCL in call emission (PR #138864)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/138864 This essentially reverts 1bf1a156d673. OpenCL's handling of address spaces has always been a mess, but it's better than it used to be so this hack appears to be unnecessary now. None of the code here should really depend on the language or language address space. The ABI address space to use is already explicit in the ABIArgInfo, so use that instead of guessing it has anything to do with LangAS::Default or getASTAllocaAddressSpace. The below usage of LangAS::Default and getASTAllocaAddressSpace are also suspect, but appears to be a more involved and separate fix. Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Read the address space from the ABIArgInfo (PR #138865)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/138865 Do not assume it's the alloca address space, we have an explicit address space to use for the argument already. Also use the original value's type instead of assuming DefaultAS. >From 2e920e995f2db29b40a5b18496dcd266733b8b6f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 7 May 2025 14:39:38 +0200 Subject: [PATCH] clang: Read the address space from the ABIArgInfo Do not assume it's the alloca address space, we have an explicit address space to use for the argument already. Also use the original value's type instead of assuming DefaultAS. --- clang/lib/CodeGen/CGCall.cpp | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 1404bdfd69647..db8820a8c517e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5384,16 +5384,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (!NeedCopy) { // Skip the extra memcpy call. llvm::Value *V = getAsNaturalPointerTo(Addr, I->Ty); - auto *T = llvm::PointerType::get( - CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace()); + auto *T = llvm::PointerType::get(CGM.getLLVMContext(), + ArgInfo.getIndirectAddrSpace()); // FIXME: This should not depend on the language address spaces, and // only the contextual values. If the address space mismatches, see if // we can look through a cast to a compatible address space value, // otherwise emit a copy. llvm::Value *Val = getTargetHooks().performAddrSpaceCast( - *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, - true); + *this, V, I->Ty.getAddressSpace(), CGM.getASTAllocaAddressSpace(), + T, true); if (ArgHasMaybeUndefAttr) Val = Builder.CreateFreeze(Val); IRCallArgs[FirstIRArg] = Val; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Read the address space from the ABIArgInfo (PR #138865)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/138865?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#138865** https://app.graphite.dev/github/pr/llvm/llvm-project/138865?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/138865?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#138864** https://app.graphite.dev/github/pr/llvm/llvm-project/138864?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138863** https://app.graphite.dev/github/pr/llvm/llvm-project/138863?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138862** https://app.graphite.dev/github/pr/llvm/llvm-project/138862?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/138865 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Fix broken implicit cast to generic address space (PR #138863)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This fixes emitting undefined behavior where a 64-bit generic pointer is written to a 32-bit slot allocated for a private pointer. This can be seen in test/CodeGenOpenCL/amdgcn-automatic-variable.cl's wrong_pointer_alloca. --- Patch is 60.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138863.diff 13 Files Affected: - (modified) clang/lib/CodeGen/CGDecl.cpp (+2-1) - (modified) clang/lib/CodeGen/CGExpr.cpp (+8-9) - (modified) clang/lib/CodeGen/CodeGenFunction.h (+19-1) - (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+1-4) - (modified) clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (+19-25) - (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+1-4) - (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+38-52) - (modified) clang/test/CodeGenOpenCL/amdgpu-nullptr.cl (+14-14) - (modified) clang/test/CodeGenOpenCL/blocks.cl (+3-3) - (modified) clang/test/CodeGenOpenCL/builtins-alloca.cl (+16-32) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+4-6) - (modified) clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl (+3-4) - (modified) clang/test/Index/pipe-size.cl (+1-1) ``diff diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index db34e2738b4cf..1e54e55c5abbb 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1588,7 +1588,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(), + address = CreateTempAlloca(allocaTy, Ty.getAddressSpace(), + allocaAlignment, D.getName(), /*ArraySize=*/nullptr, &AllocaAddr); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 2e01adc51fdf0..7f1308719a71e 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -100,13 +100,11 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits Align, return RawAddress(Alloca, Ty, Align, KnownNonNull); } -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. The alloca is casted to default address space if necessary. -RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, - const Twine &Name, +RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS, + CharUnits Align, const Twine &Name, llvm::Value *ArraySize, RawAddress *AllocaAddr) { - auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); + RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize); if (AllocaAddr) *AllocaAddr = Alloca; llvm::Value *V = Alloca.getPointer(); @@ -114,8 +112,9 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, // be different from the type defined by the language. For example, // in C++ the auto variables are in the default address space. Therefore // cast alloca to the default address space when necessary. - if (getASTAllocaAddressSpace() != LangAS::Default) { -auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + + unsigned DestAddrSpace = getContext().getTargetAddressSpace(DestLangAS); + if (DestAddrSpace != Alloca.getAddressSpace()) { llvm::IRBuilderBase::InsertPointGuard IPG(Builder); // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, // otherwise alloca is inserted at the current insertion point of the @@ -123,8 +122,8 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, if (!ArraySize) Builder.SetInsertPoint(getPostAllocaInsertPoint()); V = getTargetHooks().performAddrSpaceCast( -*this, V, getASTAllocaAddressSpace(), LangAS::Default, -Builder.getPtrTy(DestAddrSpace), /*non-null*/ true); +*this, V, getASTAllocaAddressSpace(), DestLangAS, +Builder.getPtrTy(DestAddrSpace), /*IsNonNull=*/true); } return RawAddress(V, Ty, Align, KnownNonNull); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 561f8f6a2a2fb..c0bc3825f0188 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2861,10 +2861,28 @@ class CodeGenFunction : public CodeGenTypeCache { /// more efficient if the caller knows that the address will not be exposed. llv
[llvm-branch-commits] [clang] clang/OpenCL: Fix special casing OpenCL in call emission (PR #138864)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This essentially reverts 1bf1a156d673. OpenCL's handling of address spaces has always been a mess, but it's better than it used to be so this hack appears to be unnecessary now. None of the code here should really depend on the language or language address space. The ABI address space to use is already explicit in the ABIArgInfo, so use that instead of guessing it has anything to do with LangAS::Default or getASTAllocaAddressSpace. The below usage of LangAS::Default and getASTAllocaAddressSpace are also suspect, but appears to be a more involved and separate fix. --- Full diff: https://github.com/llvm/llvm-project/pull/138864.diff 1 Files Affected: - (modified) clang/lib/CodeGen/CGCall.cpp (+7-12) ``diff diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 82a24f7c295a2..1404bdfd69647 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5366,7 +5366,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, NeedCopy = true; } else if (I->hasLValue()) { auto LV = I->getKnownLValue(); - auto AS = LV.getAddressSpace(); bool isByValOrRef = ArgInfo.isIndirectAliased() || ArgInfo.getIndirectByVal(); @@ -5375,17 +5374,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, (LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) { NeedCopy = true; } - if (!getLangOpts().OpenCL) { -if ((isByValOrRef && (AS != LangAS::Default && - AS != CGM.getASTAllocaAddressSpace( { - NeedCopy = true; -} - } - // For OpenCL even if RV is located in default or alloca address space - // we don't want to perform address space cast for it. - else if ((isByValOrRef && Addr.getType()->getAddressSpace() != -IRFuncTy->getParamType(FirstIRArg) -->getPointerAddressSpace())) { + + if (isByValOrRef && Addr.getType()->getAddressSpace() != + ArgInfo.getIndirectAddrSpace()) { NeedCopy = true; } } @@ -5396,6 +5387,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto *T = llvm::PointerType::get( CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace()); + // FIXME: This should not depend on the language address spaces, and + // only the contextual values. If the address space mismatches, see if + // we can look through a cast to a compatible address space value, + // otherwise emit a copy. llvm::Value *Val = getTargetHooks().performAddrSpaceCast( *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true); `` https://github.com/llvm/llvm-project/pull/138864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Remove dest LangAS argument from performAddrSpaceCast (PR #138866)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) Changes It isn't used and is redundant with the result pointer type argument. A more reasonable API would only have LangAS parameters, or IR parameters, not both. Not all values have a meaningful value for this. I'm also not sure why we have this at all, it's not overridden by any targets and further simplification is possible. --- Full diff: https://github.com/llvm/llvm-project/pull/138866.diff 13 Files Affected: - (modified) clang/lib/CodeGen/CGAtomic.cpp (+2-2) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+4-4) - (modified) clang/lib/CodeGen/CGCall.cpp (+3-8) - (modified) clang/lib/CodeGen/CGClass.cpp (+2-2) - (modified) clang/lib/CodeGen/CGDecl.cpp (+4-5) - (modified) clang/lib/CodeGen/CGException.cpp (+2-3) - (modified) clang/lib/CodeGen/CGExpr.cpp (+6-7) - (modified) clang/lib/CodeGen/CGExprCXX.cpp (+2-4) - (modified) clang/lib/CodeGen/CGExprConstant.cpp (+3-3) - (modified) clang/lib/CodeGen/CGExprScalar.cpp (+2-3) - (modified) clang/lib/CodeGen/CodeGenModule.cpp (+3-3) - (modified) clang/lib/CodeGen/TargetInfo.cpp (+3-3) - (modified) clang/lib/CodeGen/TargetInfo.h (+3-4) ``diff diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 0af3cd07b13a0..51f0799a792fd 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -1084,8 +1084,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); auto *DestType = llvm::PointerType::get(getLLVMContext(), DestAS); - return getTargetHooks().performAddrSpaceCast( - *this, V, AS, LangAS::opencl_generic, DestType, false); + return getTargetHooks().performAddrSpaceCast(*this, V, AS, DestType, + false); }; Args.add(RValue::get(CastToGenericAddrSpace(Ptr.emitRawPointer(*this), diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e6816736412b8..45e0f69c46902 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4081,8 +4081,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); if (AAS != EAS) { llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); - return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, - EAS, Ty)); + return RValue::get( + getTargetHooks().performAddrSpaceCast(*this, AI, AAS, Ty)); } return RValue::get(AI); } @@ -4103,8 +4103,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); if (AAS != EAS) { llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); - return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, - EAS, Ty)); + return RValue::get( + getTargetHooks().performAddrSpaceCast(*this, AI, AAS, Ty)); } return RValue::get(AI); } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index db8820a8c517e..dd892bada0433 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5242,12 +5242,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) { llvm::Value *V = SRetPtr.getBasePointer(); LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace()); -LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace()); llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(), RetAI.getIndirectAddrSpace()); SRetPtr = SRetPtr.withPointer( -getTargetHooks().performAddrSpaceCast(*this, V, SAS, DAS, Ty, true), +getTargetHooks().performAddrSpaceCast(*this, V, SAS, Ty, true), SRetPtr.isKnownNonNull()); } IRCallArgs[IRFunctionArgs.getSRetArgNo()] = @@ -5392,8 +5391,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // we can look through a cast to a compatible address space value, // otherwise emit a copy. llvm::Value *Val = getTargetHooks().performAddrSpaceCast( - *this, V, I->Ty.getAddressSpace(), CGM.getASTAllocaAddressSpace(), - T, true); + *this, V, I->Ty.getAddressSpace(), T, true); if (ArgHasMaybeUndefAttr) Val = Builder.CreateFreeze(Val); IRCallArgs[FirstIRArg] = Val; @@ -5482,12 +5480,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (FirstIRArg < IRFuncTy->getNumParams() &&
[llvm-branch-commits] [llvm] [KeyInstr][LoopRotate] Remap atoms of duplicated instructions (PR #133490)
OCHyams wrote: > LGTM (although the hand-written (check-lines that is) tests are more fun and > easier to read) Agreed. I think I must've lost steam around this part of the stack. Fixed. https://github.com/llvm/llvm-project/pull/133490 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 99a560ff2b72ea0dd90c16fedeaac27820398079 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 96 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 154 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..6b3467573a0a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,77 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(!"Unimplemented"); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = + loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl, DAG); + + // Modified the
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 0e4399dc220b175e78ad110118635bf5e843d768 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 35 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 207 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6b3467573a0a2..d0ea0a607aed7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,38 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getValueType(0).getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ExtType, dl, MemIntVT, IntVT, Ch, Ptr, + LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, + DAG.getIntPtrConstant(0, dl)); + SDValue ExtractHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiIntVT, ALD, + DAG.getIntPtrConstant(1, dl)); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHE
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 5ce8ea6a12090d66e6bacceea9837c54ee83 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 531bc05506d196392d1aac609049bda9cad7da01 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f04603867a587..749fa34e791af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From 08e39f28a1f041a72bbbf3ebe520f9136e5aebbc Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 5ce8ea6a12090d66e6bacceea9837c54ee83 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 99a560ff2b72ea0dd90c16fedeaac27820398079 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 96 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 154 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..6b3467573a0a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,77 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(!"Unimplemented"); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = + loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl, DAG); + + // Modified the
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 531bc05506d196392d1aac609049bda9cad7da01 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f04603867a587..749fa34e791af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635 >From 5b5d94887ed368de16e1395727295527ff67b471 Mon Sep 17 00:00:00 2001 From: jofernau_amdeng Date: Tue, 6 May 2025 01:48:11 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td| 7 llvm/test/CodeGen/X86/atomic-load-store.ll | 43 -- llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 167e27eddd71e..8ad8a0a6194d6 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1200,6 +1200,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9ee8b4fc5ac7f..935d058a52f8f 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -149,8 +149,7 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { define <2 x i8> @atomic_vec2_i8(ptr %x) { ; CHECK3-LABEL: atomic_vec2_i8: ; CHECK3: ## %bb.0: -; CHECK3-NEXT:movzwl (%rdi), %eax -; CHECK3-NEXT:movd %eax, %xmm0 +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK3-NEXT:retq ; ; CHECK0-LABEL: atomic_vec2_i8: @@ -165,11 +164,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { } define <2 x i16> @atomic_vec2_i16(ptr %x) { -; CHECK-LABEL: atomic_vec2_i16: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } @@ -177,8 +180,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) { define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { ; CHECK-LABEL: atomic_vec2_ptr270: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret @@ -187,8 +189,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_i32_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret @@ -197,8 +198,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) { define <2 x float> @atomic_vec2_float_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_float_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret @@ -354,11 +354,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_i8: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec4_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec4_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } @@ -366,8 +370,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { define <4 x i16> @atomic_vec4_i16(
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From 72fd8a6b0d8d304992a804e7e05367389feedc2c Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..36c1305a7c5df 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 08d0405345f57..4293df8c13571 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -394,6 +409,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 0e4399dc220b175e78ad110118635bf5e843d768 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 35 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 207 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6b3467573a0a2..d0ea0a607aed7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,38 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getValueType(0).getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ExtType, dl, MemIntVT, IntVT, Ch, Ptr, + LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, + DAG.getIntPtrConstant(0, dl)); + SDValue ExtractHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiIntVT, ALD, + DAG.getIntPtrConstant(1, dl)); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 935d058a52f8f..42b0955824293 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl $16, %ecx +; CHECK0-NEXT:movw %cx, %dx +; CHE
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From 531bc05506d196392d1aac609049bda9cad7da01 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f04603867a587..749fa34e791af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From 08e39f28a1f041a72bbbf3ebe520f9136e5aebbc Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] Provide locale conversions to tests through lit substitution (#105651) (PR #136449)
mstorsjo wrote: I had a look at the test failures locally; this seems to be caused by updates on the macOS CI runners - which has been waived on the main branch via #135202 / 88e15b781506949c56936b8642774125772fdeb2. I guess we could backport that commit as well? We'd probably need to do that anyway if we want to have working macOS CI on the release branch - so that could be a separate individual backport that is done first. https://github.com/llvm/llvm-project/pull/136449 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 5ce8ea6a12090d66e6bacceea9837c54ee83 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From 99a560ff2b72ea0dd90c16fedeaac27820398079 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 96 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 154 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..6b3467573a0a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,77 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue loadElement(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, + SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } else if (FirstVT == WidenVT) +return LdOp; + else { +// TODO: We don't currently have any tests that exercise this code path. +assert(!"Unimplemented"); + } +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = + loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl, DAG); + + // Modified the
[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add helpers for Template config (PR #138062)
Keenuts wrote: Hi, I'm trying to understand the heavy load on the CI we have. I see many force-push/sync of this PR (which triggers a partial build/test). Is this an automated sync? https://github.com/llvm/llvm-project/pull/138062 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP] cancel(lation point) taskgroup LLVMIR (PR #137841)
@@ -3075,9 +3115,6 @@ convertOmpCancel(omp::CancelOp op, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - if (failed(checkImplementationStatus(*op.getOperation( -return failure(); - skatrak wrote: Nit: Even if all clauses for the operation are already supported, I think we should still call `checkImplementationStatus()`. It succeeds by default for operations that don't have explicit checks, and it makes adding new checks whenever unsupported operands are introduced easier later on. https://github.com/llvm/llvm-project/pull/137841 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP] cancel(lation point) taskgroup LLVMIR (PR #137841)
https://github.com/skatrak edited https://github.com/llvm/llvm-project/pull/137841 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Fix broken implicit cast to generic address space (PR #138863)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/138863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/OpenCL: Fix special casing OpenCL in call emission (PR #138864)
llvmbot wrote: @llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) Changes This essentially reverts 1bf1a156d673. OpenCL's handling of address spaces has always been a mess, but it's better than it used to be so this hack appears to be unnecessary now. None of the code here should really depend on the language or language address space. The ABI address space to use is already explicit in the ABIArgInfo, so use that instead of guessing it has anything to do with LangAS::Default or getASTAllocaAddressSpace. The below usage of LangAS::Default and getASTAllocaAddressSpace are also suspect, but appears to be a more involved and separate fix. --- Full diff: https://github.com/llvm/llvm-project/pull/138864.diff 1 Files Affected: - (modified) clang/lib/CodeGen/CGCall.cpp (+7-12) ``diff diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 82a24f7c295a2..1404bdfd69647 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5366,7 +5366,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, NeedCopy = true; } else if (I->hasLValue()) { auto LV = I->getKnownLValue(); - auto AS = LV.getAddressSpace(); bool isByValOrRef = ArgInfo.isIndirectAliased() || ArgInfo.getIndirectByVal(); @@ -5375,17 +5374,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, (LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) { NeedCopy = true; } - if (!getLangOpts().OpenCL) { -if ((isByValOrRef && (AS != LangAS::Default && - AS != CGM.getASTAllocaAddressSpace( { - NeedCopy = true; -} - } - // For OpenCL even if RV is located in default or alloca address space - // we don't want to perform address space cast for it. - else if ((isByValOrRef && Addr.getType()->getAddressSpace() != -IRFuncTy->getParamType(FirstIRArg) -->getPointerAddressSpace())) { + + if (isByValOrRef && Addr.getType()->getAddressSpace() != + ArgInfo.getIndirectAddrSpace()) { NeedCopy = true; } } @@ -5396,6 +5387,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto *T = llvm::PointerType::get( CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace()); + // FIXME: This should not depend on the language address spaces, and + // only the contextual values. If the address space mismatches, see if + // we can look through a cast to a compatible address space value, + // otherwise emit a copy. llvm::Value *Val = getTargetHooks().performAddrSpaceCast( *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true); `` https://github.com/llvm/llvm-project/pull/138864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Abhina Sree (abhina-sree) Changes This patch enables the fexec-charset option to control the execution charset of string literals. It sets the default internal charset, system charset, and execution charset for z/OS and UTF-8 for all other platforms. This patch depends on adding the CharSetConverter class https://github.com/llvm/llvm-project/pull/138893 --- Patch is 34.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138895.diff 20 Files Affected: - (modified) clang/docs/LanguageExtensions.rst (+1-2) - (modified) clang/include/clang/Basic/LangOptions.h (+3) - (modified) clang/include/clang/Basic/TokenKinds.h (+7) - (modified) clang/include/clang/Driver/Options.td (+5) - (added) clang/include/clang/Lex/LiteralConverter.h (+36) - (modified) clang/include/clang/Lex/LiteralSupport.h (+8-4) - (modified) clang/include/clang/Lex/Preprocessor.h (+3) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+13-4) - (modified) clang/lib/Frontend/CompilerInstance.cpp (+4) - (modified) clang/lib/Frontend/InitPreprocessor.cpp (+8-4) - (modified) clang/lib/Lex/CMakeLists.txt (+1) - (added) clang/lib/Lex/LiteralConverter.cpp (+68) - (modified) clang/lib/Lex/LiteralSupport.cpp (+106-22) - (added) clang/test/CodeGen/systemz-charset.c (+35) - (added) clang/test/CodeGen/systemz-charset.cpp (+46) - (modified) clang/test/Driver/cl-options.c (+4-3) - (modified) clang/test/Driver/clang_f_opts.c (+9-3) - (modified) clang/test/Preprocessor/init-s390x.c (+1) - (modified) llvm/include/llvm/TargetParser/Triple.h (+3) - (modified) llvm/lib/TargetParser/Triple.cpp (+7) ``diff diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index ebcad44197ce4..44e20623d4d0b 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -416,8 +416,7 @@ Builtin Macros ``__clang_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of narrow string literals, e.g., ``"hello"``. This macro typically expands to - "UTF-8" (but may change in the future if the - ``-fexec-charset="Encoding-Name"`` option is implemented.) + the charset specified by -fexec-charset if specified, or the system charset. ``__clang_wide_literal_encoding__`` Defined to a narrow string literal that represents the current encoding of diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 491e8bee9fd5c..559a4be70b74c 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -633,6 +633,9 @@ class LangOptions : public LangOptionsBase { bool AtomicFineGrainedMemory = false; bool AtomicIgnoreDenormalMode = false; + /// Name of the exec charset to convert the internal charset to. + std::string ExecCharset; + LangOptions(); /// Set language defaults for the given input language and diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index 1b133dde89587..34f6133973e71 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -101,6 +101,13 @@ inline bool isLiteral(TokenKind K) { isStringLiteral(K) || K == tok::header_name || K == tok::binary_data; } +/// Return true if this is a utf literal kind. +inline bool isUTFLiteral(TokenKind K) { + return K == tok::utf8_char_constant || K == tok::utf8_string_literal || + K == tok::utf16_char_constant || K == tok::utf16_string_literal || + K == tok::utf32_char_constant || K == tok::utf32_string_literal; +} + /// Return true if this is any of tok::annot_* kinds. bool isAnnotation(TokenKind K); diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 30ea75bb108d5..9d352eb1270fe 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7197,6 +7197,11 @@ let Visibility = [CC1Option, CC1AsOption, FC1Option] in { def tune_cpu : Separate<["-"], "tune-cpu">, HelpText<"Tune for a specific cpu type">, MarshallingInfoString>; +def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"">, + HelpText<"Set the execution for string and character literals. " + "Supported character encodings include ISO8859-1, UTF-8, IBM-1047 " + "and those supported by the host icu or iconv library.">, + MarshallingInfoString>; def target_cpu : Separate<["-"], "target-cpu">, HelpText<"Target a specific cpu type">, MarshallingInfoString>; diff --git a/clang/include/clang/Lex/LiteralConverter.h b/clang/include/clang/Lex/LiteralConverter.h new file mode 100644 index 0..203111255b791 --- /dev/null +++ b/clang/include/clang/Lex/LiteralConverter.h @@ -0,0 +1,36 @@ +//===--- clang/Lex/LiteralConverter.h - Translator for Literals -*- C++ -*-===// +// +// Part of the LLVM Project,
[llvm-branch-commits] [clang] [llvm] Enable fexec-charset option (PR #138895)
https://github.com/abhina-sree created https://github.com/llvm/llvm-project/pull/138895 This patch enables the fexec-charset option to control the execution charset of string literals. It sets the default internal charset, system charset, and execution charset for z/OS and UTF-8 for all other platforms. This patch depends on adding the CharSetConverter class https://github.com/llvm/llvm-project/pull/138893 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/OpenCL: Fix special casing OpenCL in call emission (PR #138864)
https://github.com/svenvh approved this pull request. https://github.com/llvm/llvm-project/pull/138864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #138829)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/138829 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/138828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #138829)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: Akshat Oke (optimisan) Changes AMDGPU/llc-pipeline-npm.ll test update is because the dummy class name is now changed to its CL name. --- Patch is 22.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138829.diff 8 Files Affected: - (modified) llvm/include/llvm/InitializePasses.h (+1-1) - (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1) - (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1) - (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1) - (modified) llvm/lib/CodeGen/ProcessImplicitDefs.cpp (+39-18) - (modified) llvm/lib/Passes/PassBuilder.cpp (+1) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+2-2) - (modified) llvm/test/CodeGen/X86/unreachable-mbb-undef-phi.mir (+1) ``diff diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index e75f9c7a2cfe8..9a380994c2886 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -246,7 +246,7 @@ void initializePostRASchedulerLegacyPass(PassRegistry &); void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &); void initializePrintFunctionPassWrapperPass(PassRegistry &); void initializePrintModulePassWrapperPass(PassRegistry &); -void initializeProcessImplicitDefsPass(PassRegistry &); +void initializeProcessImplicitDefsLegacyPass(PassRegistry &); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry &); void initializePromoteLegacyPassPass(PassRegistry &); void initializeRABasicPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 648437fe12c91..0704ae3b2ec1d 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -71,6 +71,7 @@ #include "llvm/CodeGen/PeepholeOptimizer.h" #include "llvm/CodeGen/PostRASchedulerList.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/ProcessImplicitDefs.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" #include "llvm/CodeGen/RegAllocFast.h" #include "llvm/CodeGen/RegAllocGreedyPass.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 8717b79b26968..9d75e4642db00 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -184,6 +184,7 @@ MACHINE_FUNCTION_PASS("print", MachineUniformityPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", SlotIndexesPrinterPass(errs())) MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(errs())) +MACHINE_FUNCTION_PASS("process-imp-defs", ProcessImplicitDefsPass()) MACHINE_FUNCTION_PASS("prolog-epilog", PrologEpilogInserterPass()) MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass()) MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass()) @@ -316,7 +317,6 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass) -DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass) DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 065fd4704ccfb..6e12edc3d3a5b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -110,7 +110,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostRAMachineSinkingLegacyPass(Registry); initializePostRASchedulerLegacyPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); - initializeProcessImplicitDefsPass(Registry); + initializeProcessImplicitDefsLegacyPass(Registry); initializeRABasicPass(Registry); initializeRAGreedyLegacyPass(Registry); initializeRegAllocFastPass(Registry); diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index be81ecab9c897..54fd7814ef4f3 100644 --- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -6,6 +6,7 @@ // //===--===// +#include "llvm/CodeGen/ProcessImplicitDefs.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,24 +27,15 @@ using namespace llvm; namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def /// for each use. Add isUndef marker to implicit_def defs and their uses.
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #138829)
https://github.com/optimisan ready_for_review https://github.com/llvm/llvm-project/pull/138829 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 137a062 - Revert "[RISCV] Initial codegen support for zvqdotq extension (#137039)"
Author: Philip Reames Date: 2025-05-07T08:18:37-07:00 New Revision: 137a062e2185cdd797c0c009f556a200e8194009 URL: https://github.com/llvm/llvm-project/commit/137a062e2185cdd797c0c009f556a200e8194009 DIFF: https://github.com/llvm/llvm-project/commit/137a062e2185cdd797c0c009f556a200e8194009.diff LOG: Revert "[RISCV] Initial codegen support for zvqdotq extension (#137039)" This reverts commit 1ac489c8e38ecaeccba7d8826273395eaba2db6c. Added: Modified: llvm/lib/Target/RISCV/RISCVISelLowering.cpp llvm/lib/Target/RISCV/RISCVISelLowering.h llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll Removed: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 698b951ad4928..86f8873c135ef 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6971,7 +6971,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 139 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 134 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6995,7 +6995,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 139 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 134 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) @@ -18101,118 +18101,6 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, DAG.getBuildVector(VT, DL, RHSOps)); } -static SDValue lowerVQDOT(unsigned Opc, SDValue Op0, SDValue Op1, - const SDLoc &DL, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(RISCVISD::VQDOT_VL == Opc || RISCVISD::VQDOTU_VL == Opc || - RISCVISD::VQDOTSU_VL == Opc); - MVT VT = Op0.getSimpleValueType(); - assert(VT == Op1.getSimpleValueType() && - VT.getVectorElementType() == MVT::i32); - - assert(VT.isFixedLengthVector()); - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); - SDValue Passthru = convertToScalableVector( - ContainerVT, DAG.getConstant(0, DL, VT), DAG, Subtarget); - Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget); - Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); - - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; - SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); - SDValue LocalAccum = DAG.getNode(Opc, DL, ContainerVT, - {Op0, Op1, Passthru, Mask, VL, PolicyOp}); - return convertFromScalableVector(VT, LocalAccum, DAG, Subtarget); -} - -static MVT getQDOTXResultType(MVT OpVT) { - ElementCount OpEC = OpVT.getVectorElementCount(); - assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8); - return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4)); -} - -static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, - SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, - const RISCVTargetLowering &TLI) { - // Note: We intentionally do not check the legality of the reduction type. - // We want to handle the m4/m8 *src* types, and thus need to let illegal - // intermediate types flow through here. - if (InVec.getValueType().getVectorElementType() != MVT::i32 || - !InVec.getValueType().getVectorElementCount().isKnownMultipleOf(4)) -return SDValue(); - - // reduce (zext a) <--> reduce (mul zext a. zext 1) - // reduce (sext a) <--> reduce (mul sext a. sext 1) - if (InVec.getOpcode() == ISD::ZERO_EXTEND || - InVec.getOpcode() == ISD::SIGN_EXTEND) { -SDValue A = InVec.getOperand(0); -if (A.getValueType().getVectorElementType() != MVT::i8 || -!TLI.isTypeLegal(A.getValueType())) - return SDValue(); - -MVT ResVT = getQDOTXResultType(A.getSimpleValueType()); -A = DAG.getBitcast(ResVT, A); -SDValue B = DAG.getConstant(0x01010101, DL, ResVT); - -bool IsSigned = InVec.getOpco
[llvm-branch-commits] [clang] clang: Remove dest LangAS argument from performAddrSpaceCast (PR #138866)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/138866?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#138866** https://app.graphite.dev/github/pr/llvm/llvm-project/138866?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/138866?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#138865** https://app.graphite.dev/github/pr/llvm/llvm-project/138865?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138864** https://app.graphite.dev/github/pr/llvm/llvm-project/138864?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138863** https://app.graphite.dev/github/pr/llvm/llvm-project/138863?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138862** https://app.graphite.dev/github/pr/llvm/llvm-project/138862?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/138866 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/OpenCL: Fix special casing OpenCL in call emission (PR #138864)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/138864?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#138864** https://app.graphite.dev/github/pr/llvm/llvm-project/138864?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/138864?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#138863** https://app.graphite.dev/github/pr/llvm/llvm-project/138863?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138862** https://app.graphite.dev/github/pr/llvm/llvm-project/138862?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/138864 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][MemRef] Use specialized index ops to fold expand/collapse_shape (PR #138930)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Krzysztof Drewniak (krzysz00) Changes This PR updates the FoldMemRefAliasOps to use `affine.linearize_index` and `affine.delinearize_index` to perform the index computations needed to fold a `memref.expand_shape` or `memref.collapse_shape` into its consumers, respectively. This also loosens some limitations of the pass: 1. The existing `output_shape` argument to `memref.expand_shape` is now used, eliminating the need to re-infer this shape or call `memref.dim`. 2. Because we're using `affine.delinearize_index`, the restriction that each group in a `memref.collapse_shape` can only have one dynamic dimension is removed. --- Patch is 31.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138930.diff 3 Files Affected: - (modified) mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td (+10-4) - (modified) mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp (+49-120) - (modified) mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir (+64-65) ``diff diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index d6d8161d3117b..f34b5b46cab50 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -1342,14 +1342,14 @@ def MemRef_ReinterpretCastOp according to specified offsets, sizes, and strides. ```mlir -%result1 = memref.reinterpret_cast %arg0 to +%result1 = memref.reinterpret_cast %arg0 to offset: [9], sizes: [4, 4], strides: [16, 2] : memref<8x8xf32, strided<[8, 1], offset: 0>> to memref<4x4xf32, strided<[16, 2], offset: 9>> -%result2 = memref.reinterpret_cast %result1 to +%result2 = memref.reinterpret_cast %result1 to offset: [0], sizes: [2, 2], strides: [4, 2] @@ -1755,6 +1755,12 @@ def MemRef_ExpandShapeOp : MemRef_ReassociativeReshapeOp<"expand_shape", [ OpBuilder &b, Location loc, MemRefType expandedType, ArrayRef reassociation, ArrayRef inputShape); + +// Return a vector with all the static and dynamic values in the output shape. +SmallVector getMixedOutputShape() { + OpBuilder builder(getContext()); + return ::mlir::getMixedValues(getStaticOutputShape(), getOutputShape(), builder); +} }]; let hasVerifier = 1; @@ -1873,7 +1879,7 @@ def MemRef_StoreOp : MemRef_Op<"store", let summary = "store operation"; let description = [{ The `store` op stores an element into a memref at the specified indices. - + The number of indices must match the rank of the memref. The indices must be in-bounds: `0 <= idx < dim_size` @@ -2025,7 +2031,7 @@ def SubViewOp : MemRef_OpWithOffsetSizesAndStrides<"subview", [ Unlike the `reinterpret_cast`, the values are relative to the strided memref of the input (`%result1` in this case) and not its underlying memory. - + Example 2: ```mlir diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp index e4fb3f9bb87ed..2acb90613e5d1 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp @@ -59,92 +59,28 @@ using namespace mlir; /// /// %2 = load %0[6 * i1 + i2, %i3] : /// memref<12x42xf32> -static LogicalResult -resolveSourceIndicesExpandShape(Location loc, PatternRewriter &rewriter, -memref::ExpandShapeOp expandShapeOp, -ValueRange indices, -SmallVectorImpl &sourceIndices) { - // Record the rewriter context for constructing ops later. - MLIRContext *ctx = rewriter.getContext(); - - // Capture expand_shape's input dimensions as `SmallVector`. - // This is done for the purpose of inferring the output shape via - // `inferExpandOutputShape` which will in turn be used for suffix product - // calculation later. - SmallVector srcShape; - MemRefType srcType = expandShapeOp.getSrcType(); - - for (int64_t i = 0, e = srcType.getRank(); i < e; ++i) { -if (srcType.isDynamicDim(i)) { - srcShape.push_back( - rewriter.create(loc, expandShapeOp.getSrc(), i) - .getResult()); -} else { - srcShape.push_back(rewriter.getIndexAttr(srcType.getShape()[i])); -} - } - - auto outputShape = inferExpandShapeOutputShape( - rewriter, loc, expandShapeOp.getResultType(), - expandShapeOp.getReassociationIndices(), srcShape); - if (!outputShape.has_value()) -return failure(); +static LogicalResult resolveSourceIndicesExpandShape( +Location loc, PatternRewriter &rewriter, +memref::ExpandShapeOp expandShapeOp, ValueRange indices, +SmallVectorImpl &sourceIndices, bool startsInbounds) { + SmallVector destShape = expandShapeOp.getMixedOutputSha
[llvm-branch-commits] [mlir] [mlir][MemRef] Use specialized index ops to fold expand/collapse_shape (PR #138930)
https://github.com/krzysz00 created https://github.com/llvm/llvm-project/pull/138930 This PR updates the FoldMemRefAliasOps to use `affine.linearize_index` and `affine.delinearize_index` to perform the index computations needed to fold a `memref.expand_shape` or `memref.collapse_shape` into its consumers, respectively. This also loosens some limitations of the pass: 1. The existing `output_shape` argument to `memref.expand_shape` is now used, eliminating the need to re-infer this shape or call `memref.dim`. 2. Because we're using `affine.delinearize_index`, the restriction that each group in a `memref.collapse_shape` can only have one dynamic dimension is removed. Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][MemRef] Use specialized index ops to fold expand/collapse_shape (PR #138930)
llvmbot wrote: @llvm/pr-subscribers-mlir-memref Author: Krzysztof Drewniak (krzysz00) Changes This PR updates the FoldMemRefAliasOps to use `affine.linearize_index` and `affine.delinearize_index` to perform the index computations needed to fold a `memref.expand_shape` or `memref.collapse_shape` into its consumers, respectively. This also loosens some limitations of the pass: 1. The existing `output_shape` argument to `memref.expand_shape` is now used, eliminating the need to re-infer this shape or call `memref.dim`. 2. Because we're using `affine.delinearize_index`, the restriction that each group in a `memref.collapse_shape` can only have one dynamic dimension is removed. --- Patch is 31.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138930.diff 3 Files Affected: - (modified) mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td (+10-4) - (modified) mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp (+49-120) - (modified) mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir (+64-65) ``diff diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index d6d8161d3117b..f34b5b46cab50 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -1342,14 +1342,14 @@ def MemRef_ReinterpretCastOp according to specified offsets, sizes, and strides. ```mlir -%result1 = memref.reinterpret_cast %arg0 to +%result1 = memref.reinterpret_cast %arg0 to offset: [9], sizes: [4, 4], strides: [16, 2] : memref<8x8xf32, strided<[8, 1], offset: 0>> to memref<4x4xf32, strided<[16, 2], offset: 9>> -%result2 = memref.reinterpret_cast %result1 to +%result2 = memref.reinterpret_cast %result1 to offset: [0], sizes: [2, 2], strides: [4, 2] @@ -1755,6 +1755,12 @@ def MemRef_ExpandShapeOp : MemRef_ReassociativeReshapeOp<"expand_shape", [ OpBuilder &b, Location loc, MemRefType expandedType, ArrayRef reassociation, ArrayRef inputShape); + +// Return a vector with all the static and dynamic values in the output shape. +SmallVector getMixedOutputShape() { + OpBuilder builder(getContext()); + return ::mlir::getMixedValues(getStaticOutputShape(), getOutputShape(), builder); +} }]; let hasVerifier = 1; @@ -1873,7 +1879,7 @@ def MemRef_StoreOp : MemRef_Op<"store", let summary = "store operation"; let description = [{ The `store` op stores an element into a memref at the specified indices. - + The number of indices must match the rank of the memref. The indices must be in-bounds: `0 <= idx < dim_size` @@ -2025,7 +2031,7 @@ def SubViewOp : MemRef_OpWithOffsetSizesAndStrides<"subview", [ Unlike the `reinterpret_cast`, the values are relative to the strided memref of the input (`%result1` in this case) and not its underlying memory. - + Example 2: ```mlir diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp index e4fb3f9bb87ed..2acb90613e5d1 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp @@ -59,92 +59,28 @@ using namespace mlir; /// /// %2 = load %0[6 * i1 + i2, %i3] : /// memref<12x42xf32> -static LogicalResult -resolveSourceIndicesExpandShape(Location loc, PatternRewriter &rewriter, -memref::ExpandShapeOp expandShapeOp, -ValueRange indices, -SmallVectorImpl &sourceIndices) { - // Record the rewriter context for constructing ops later. - MLIRContext *ctx = rewriter.getContext(); - - // Capture expand_shape's input dimensions as `SmallVector`. - // This is done for the purpose of inferring the output shape via - // `inferExpandOutputShape` which will in turn be used for suffix product - // calculation later. - SmallVector srcShape; - MemRefType srcType = expandShapeOp.getSrcType(); - - for (int64_t i = 0, e = srcType.getRank(); i < e; ++i) { -if (srcType.isDynamicDim(i)) { - srcShape.push_back( - rewriter.create(loc, expandShapeOp.getSrc(), i) - .getResult()); -} else { - srcShape.push_back(rewriter.getIndexAttr(srcType.getShape()[i])); -} - } - - auto outputShape = inferExpandShapeOutputShape( - rewriter, loc, expandShapeOp.getResultType(), - expandShapeOp.getReassociationIndices(), srcShape); - if (!outputShape.has_value()) -return failure(); +static LogicalResult resolveSourceIndicesExpandShape( +Location loc, PatternRewriter &rewriter, +memref::ExpandShapeOp expandShapeOp, ValueRange indices, +SmallVectorImpl &sourceIndices, bool startsInbounds) { + SmallVector destShape = expandShapeOp.getMixedOu
[llvm-branch-commits] [mlir] [mlir][MemRef] Use specialized index ops to fold expand/collapse_shape (PR #138930)
krzysz00 wrote: PRs this depends on: * #138929 https://github.com/llvm/llvm-project/pull/138930 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [Hexagon] Add missing patterns to select PFALSE and PTRUE (#138712) (PR #138770)
https://github.com/iajbar approved this pull request. https://github.com/llvm/llvm-project/pull/138770 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm][IR] Treat memcmp and bcmp as libcalls (PR #135706)
ilovepi wrote: > I can join libc monthly meeting, sure. Great. Looking forward to discussing this then. I'll add the topic to the meeting agenda. https://github.com/llvm/llvm-project/pull/135706 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang: Fix broken implicit cast to generic address space (PR #138863)
https://github.com/efriedma-quic approved this pull request. https://github.com/llvm/llvm-project/pull/138863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #138830)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/138830 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Update `do concurrent` mapping pass to use `fir.do_concurrent` op (PR #138489)
@@ -548,19 +357,16 @@ class DoConcurrentConversionPass return; } -llvm::DenseSet concurrentLoopsToSkip; +llvm::DenseSet concurrentLoopsToSkip; tblah wrote: My concern was that the walk order might be changed some time because I didn't realize that was a reliable part of the API. However MLIR has so many downstream users, this is probably a theoretical concern only. Ahh I see. This is okay for now then, because I can't think of a better solution off the top of my head either. https://github.com/llvm/llvm-project/pull/138489 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #138828)
https://github.com/paperchalice edited https://github.com/llvm/llvm-project/pull/138828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Update `do concurrent` mapping pass to use `fir.do_concurrent` op (PR #138489)
https://github.com/tblah approved this pull request. https://github.com/llvm/llvm-project/pull/138489 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang] Generlize names of delayed privatization CLI flags (PR #138816)
https://github.com/tblah approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/138816 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #138829)
optimisan wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/138829?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#138830** https://app.graphite.dev/github/pr/llvm/llvm-project/138830?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138829** https://app.graphite.dev/github/pr/llvm/llvm-project/138829?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/138829?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#138828** https://app.graphite.dev/github/pr/llvm/llvm-project/138828?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138670** https://app.graphite.dev/github/pr/llvm/llvm-project/138670?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138660** https://app.graphite.dev/github/pr/llvm/llvm-project/138660?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138497** https://app.graphite.dev/github/pr/llvm/llvm-project/138497?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138496** https://app.graphite.dev/github/pr/llvm/llvm-project/138496?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138495** https://app.graphite.dev/github/pr/llvm/llvm-project/138495?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#138491** https://app.graphite.dev/github/pr/llvm/llvm-project/138491?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#136818** https://app.graphite.dev/github/pr/llvm/llvm-project/136818?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/138829 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -3235,6 +3263,36 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPAliasLaneMaskRecipe::execute(VPTransformState &State) { + IRBuilderBase Builder = State.Builder; + Value *SinkValue = State.get(getSinkValue(), true); + Value *SourceValue = State.get(getSourceValue(), true); + + auto *Type = SinkValue->getType(); + Value *AliasMask = Builder.CreateIntrinsic( + Intrinsic::experimental_get_alias_lane_mask, + {VectorType::get(Builder.getInt1Ty(), State.VF), Type, + Builder.getInt64Ty()}, + {SourceValue, SinkValue, Builder.getInt64(getAccessedElementSize()), + Builder.getInt1(WriteAfterRead)}, + nullptr, "alias.lane.mask"); + State.set(this, AliasMask, /*IsScalar=*/false); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPAliasLaneMaskRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + getVPSingleValue()->printAsOperand(O, SlotTracker); + O << " = alias lane mask "; SamTebbs33 wrote: Done. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -195,6 +195,13 @@ enum class TailFoldingStyle { DataWithEVL, }; +enum class RTCheckStyle { + /// Branch to scalar loop if checks fails at runtime. + ScalarFallback, + /// Form a mask based on elements which won't be a WAR or RAW hazard SamTebbs33 wrote: Done. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits