[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-11-21 Thread Florian Hahn via llvm-branch-commits


@@ -8974,11 +8982,104 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
 assert((!CM.OptForSize ||
 CM.Hints->getForce() == LoopVectorizeHints::FK_Enabled) &&
"Cannot SCEV check stride or overflow when optimizing for size");
-VPlanTransforms::attachCheckBlock(Plan, SCEVCheckCond, SCEVCheckBlock,
+VPlanTransforms::attachCheckBlock(Plan, Plan.getOrAddLiveIn(SCEVCheckCond),
+  
Plan.createVPIRBasicBlock(SCEVCheckBlock),
   HasBranchWeights);
   }
   const auto &[MemCheckCond, MemCheckBlock] = RTChecks.getMemRuntimeChecks();
   if (MemCheckBlock && MemCheckBlock->hasNPredecessors(0)) {
+VPValue *MemCheckCondVPV = Plan.getOrAddLiveIn(MemCheckCond);
+VPBasicBlock *MemCheckBlockVP = Plan.createVPIRBasicBlock(MemCheckBlock);
+std::optional> ChecksOpt =
+CM.Legal->getRuntimePointerChecking()->getDiffChecks();
+
+// Create a mask enabling safe elements for each iteration.
+if (CM.getRTCheckStyle(TTI) == RTCheckStyle::UseSafeEltsMask &&

fhahn wrote:

would be good to outline to a separte function + document the transform


https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-11-21 Thread Florian Hahn via llvm-branch-commits


@@ -8974,11 +8982,104 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
 assert((!CM.OptForSize ||
 CM.Hints->getForce() == LoopVectorizeHints::FK_Enabled) &&
"Cannot SCEV check stride or overflow when optimizing for size");
-VPlanTransforms::attachCheckBlock(Plan, SCEVCheckCond, SCEVCheckBlock,
+VPlanTransforms::attachCheckBlock(Plan, Plan.getOrAddLiveIn(SCEVCheckCond),
+  
Plan.createVPIRBasicBlock(SCEVCheckBlock),
   HasBranchWeights);
   }
   const auto &[MemCheckCond, MemCheckBlock] = RTChecks.getMemRuntimeChecks();
   if (MemCheckBlock && MemCheckBlock->hasNPredecessors(0)) {
+VPValue *MemCheckCondVPV = Plan.getOrAddLiveIn(MemCheckCond);
+VPBasicBlock *MemCheckBlockVP = Plan.createVPIRBasicBlock(MemCheckBlock);
+std::optional> ChecksOpt =
+CM.Legal->getRuntimePointerChecking()->getDiffChecks();
+
+// Create a mask enabling safe elements for each iteration.
+if (CM.getRTCheckStyle(TTI) == RTCheckStyle::UseSafeEltsMask &&
+ChecksOpt.has_value() && ChecksOpt->size() > 0) {
+  ArrayRef Checks = *ChecksOpt;
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  VPBasicBlock *LoopBody = LoopRegion->getEntryBasicBlock();
+  VPBuilder Builder(MemCheckBlockVP);
+
+  /// Create a mask for each possibly-aliasing pointer pair, ANDing them if
+  /// there's more than one pair.
+  VPValue *AliasMask = nullptr;
+  for (PointerDiffInfo Check : Checks) {
+VPValue *Sink =
+vputils::getOrCreateVPValueForSCEVExpr(Plan, Check.SinkStart);
+VPValue *Src =
+vputils::getOrCreateVPValueForSCEVExpr(Plan, Check.SrcStart);
+
+Type *PtrType = PointerType::getUnqual(Plan.getContext());
+Sink = Builder.createScalarCast(Instruction::CastOps::IntToPtr, Sink,
+PtrType, DebugLoc());
+Src = Builder.createScalarCast(Instruction::CastOps::IntToPtr, Src,
+   PtrType, DebugLoc());
+
+SmallVector Ops{
+Src, Sink,
+Plan.getConstantInt(IntegerType::getInt64Ty(Plan.getContext()),
+Check.AccessSize)};
+VPWidenIntrinsicRecipe *M = new VPWidenIntrinsicRecipe(
+Check.WriteAfterRead ? Intrinsic::loop_dependence_war_mask
+ : Intrinsic::loop_dependence_raw_mask,
+Ops, IntegerType::getInt1Ty(Plan.getContext()));
+MemCheckBlockVP->appendRecipe(M);
+if (AliasMask)
+  AliasMask = Builder.createAnd(AliasMask, M);
+else
+  AliasMask = M;
+  }
+  assert(AliasMask && "Expected an alias mask to have been created");
+
+  // Replace uses of the loop body's active lane mask phi with an AND of 
the
+  // phi and the alias mask.
+  for (VPRecipeBase &R : *LoopBody) {
+auto *MaskPhi = dyn_cast(&R);

fhahn wrote:

I don't think we necessarily need an active-lane-mask, as long as either all 
recipes that need predication (memory ops, ops that are immediate UB on poison, 
reduction/recurrences) are already predicated (could be due to tail-folding 
without active-lane-mask) or we could convert them to predicated variants using 
the alias mask.

Also, an active-lane-mask also does not necessarily mean all required recipes 
are predicated and use the active-lane-mask (e.g. a transform may convert a 
masked memory access to an unmasked one, if it is guaranteed dereferneceable 
for the whole loop).

So would probably be good to check if all required recipes are masked and make 
sure their masks inlcude AliasMask after the transform

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Implement loop nest parser (PR #168884)

2025-11-21 Thread Kiran Chandramohan via llvm-branch-commits

kiranchandramohan wrote:

Thanks for the explanation @kparzysz. Please treat my question as a passthrough 
comment. Feel free to go ahead.

https://github.com/llvm/llvm-project/pull/168884
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [dwarf] make dwarf fission compatible with RISCV relaxations 2/2 (PR #164813)

2025-11-21 Thread via llvm-branch-commits

https://github.com/daniilavdeev updated 
https://github.com/llvm/llvm-project/pull/164813

>From f11530834eb375c8003e996ba1385089911170b2 Mon Sep 17 00:00:00 2001
From: Daniil Avdeev 
Date: Thu, 18 Sep 2025 02:05:39 +
Subject: [PATCH 1/4] [dwarf] make dwarf fission compatible with RISCV
 relaxations 2/2

This patch makes DWARF fission compatible with RISC-V relaxations by
using indirect addressing for the DW_AT_high_pc attribute. This
eliminates the remaining relocations in .dwo files.
---
 .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp   |  8 +--
 llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll | 53 +--
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 751d3735d3b2b..2e4a26ef70bc2 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -493,10 +493,12 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const 
MCSymbol *Begin,
   assert(End->isDefined() && "Invalid end label");
 
   addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
-  if (DD->getDwarfVersion() < 4)
-addLabelAddress(D, dwarf::DW_AT_high_pc, End);
-  else
+  if (DD->getDwarfVersion() >= 4 &&
+  (!isDwoUnit() || !llvm::isRangeRelaxable(Begin, End))) {
 addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+return;
+  }
+  addLabelAddress(D, dwarf::DW_AT_high_pc, End);
 }
 
 // Add info for Wasm-global-based relocation.
diff --git a/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll 
b/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll
index ab76ce04dcdb0..40c7bcb6adab6 100644
--- a/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll
+++ b/llvm/test/DebugInfo/RISCV/relax_dwo_ranges.ll
@@ -21,24 +21,30 @@
 ; RUN: llc -dwarf-version=5 -split-dwarf-file=foo.dwo -O0 
-mtriple=riscv64-unknown-linux-gnu -filetype=obj relax_dwo_ranges.ll -o %t.o
 ; RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=DWARF5 %s
 ; RUN: llvm-dwarfdump --debug-info %t.o 2> %t.txt
-; RUN: FileCheck --input-file=%t.txt %s --check-prefix=RELOCS 
--implicit-check-not=warning:
+; RUN: FileCheck --input-file=%t.txt %s --check-prefix=RELOCS --allow-empty 
--implicit-check-not=warning:
+; RUN: llvm-objdump -h %t | FileCheck --check-prefix=HDR %s
 
 ; RUN: llc -dwarf-version=4 -split-dwarf-file=foo.dwo -O0 
-mtriple=riscv64-unknown-linux-gnu -filetype=obj relax_dwo_ranges.ll -o %t.o
 ; RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=DWARF4 %s
 ; RUN: llvm-dwarfdump --debug-info %t.o 2> %t.txt
-; RUN: FileCheck --input-file=%t.txt %s --check-prefix=RELOCS 
--implicit-check-not=warning:
+; RUN: FileCheck --input-file=%t.txt %s --check-prefix=RELOCS --allow-empty 
--implicit-check-not=warning:
+; RUN: llvm-objdump -h %t | FileCheck --check-prefix=HDR %s
 
-; Currently, square() still uses an offset to represent the function's end 
address,
-; which requires a relocation here.
-; RELOCS: warning: unexpected relocations for dwo section '.debug_info.dwo'
+; RELOCS-NOT: warning: unexpected relocations for dwo section '.debug_info.dwo'
 
+; Make sure we don't produce any relocations in any .dwo section
+; HDR-NOT: .rela.{{.*}}.dwo
+
+; Ensure that 'square()' function uses indexed start and end addresses
 ; DWARF5: .debug_info.dwo contents:
 ; DWARF5: DW_TAG_subprogram
-; DWARF5-NEXT: DW_AT_low_pc [DW_FORM_addrx](indexed () address = 
0x ".text")
-; DWARF5-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x)
-; DWARF5: DW_AT_name {{.*}} "square")
+; DWARF5-NEXT: DW_AT_low_pc  [DW_FORM_addrx](indexed () address = 
0x ".text")
+; DWARF5-NEXT: DW_AT_high_pc [DW_FORM_addrx](indexed (0001) address = 
0x0044 ".text")
+; DWARF5: DW_AT_name {{.*}} "square") 
 ; DWARF5: DW_TAG_formal_parameter
 
+; HDR-NOT: .rela.{{.*}}.dwo
+
 ; Ensure there is no unnecessary addresses in .o file
 ; DWARF5: .debug_addr contents:
 ; DWARF5: Addrs: [
@@ -48,6 +54,8 @@
 ; DWARF5-NEXT: 0x006e
 ; DWARF5-NEXT: ]
 
+; HDR-NOT: .rela.{{.*}}.dwo
+
 ; Ensure that 'boo()' and 'main()' use DW_RLE_startx_length and 
DW_RLE_startx_endx
 ; entries respectively
 ; DWARF5: .debug_rnglists.dwo contents:
@@ -55,24 +63,29 @@
 ; DWARF5-NEXT: 0x0014: [DW_RLE_startx_length]:  0x0001, 
0x0012 => [0x002c, 0x003e)
 ; DWARF5-NEXT: 0x0017: [DW_RLE_end_of_list  ]
 ; DWARF5-NEXT: 0x0018: [DW_RLE_startx_endx  ]:  0x0002, 
0x0003 => [0x003e, 0x006e)
+; DWARF5-NEXT: 0x0017: [DW_RLE_end_of_list  ]
 ; DWARF5-NEXT: 0x001b: [DW_RLE_end_of_list  ]
 ; DWARF5-EMPTY:
 
+; HDR-NOT: .rela.{{.*}}.dwo
+
 ; DWARF4: .debug_info.dwo contents:
 ; DWARF4: DW_TAG_subprogram
-; DWARF4-NEXT: DW_AT_low_pc [DW_FORM_GNU_addr_index]   (indexed () 
address = 0x ".text")
-; DWARF4-NEXT: DW_AT_high_pc [DW_FORM_data4]   (0x)
-; DWARF4: DW_AT_n

[llvm-branch-commits] [flang] [flang][OpenMP] Implement loop nest parser (PR #168884)

2025-11-21 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/168884
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] `` must be nested in `` (PR #168972)

2025-11-21 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi approved this pull request.


https://github.com/llvm/llvm-project/pull/168972
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LowerMemIntrinsics] Optimize memset lowering (PR #169040)

2025-11-21 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

One thing that's not ideal with this patch is that the lowering for memsets 
with large statically known sizes wastes registers with the SelectionDAG ISel:
In this case, the IR lowering uses a `<64xi32>` store in the main memset loop.  
We correctly legalize this into 16 dwordx4 stores, but the huge `<64xi32>` 
splat value that is stored there (and which consists of 256 times the same 
byte) lives in a different basic block than the stores. SDAG ISel therefore 
doesn't know that those 64 32-bit registers with the same value are not needed 
at the same time and that 4 would be enough (GlobalISel, since it can look 
across BBs, doesn't have this problem).

You can see this for example in `@memset_p0_sz1055_align_4_varsetval` in 
`memset-param-combinations.ll`.

I tried adjusting the IR lowering to put the splat values in the same basic 
block as the accesses, but then they are LICM-ed out again.

I also tried adjusting the lowering to use N(=16) `<4xi32>` stores (with only a 
single `<4xi32>` splat), and while that fixed the register wastage, it made 
code generation worse in a different way, because the SCEV-based strength 
reduction (loop-reduce) then replaces the address computations with new 
computations that don't use `inbounds` and `nuw`, which means that offsets 
cannot be folded into store instructions in various cases. This even happens if 
I change the memset lowering to produce the form that loop-reduce would 
generate: it still re-generates the address computation minus the 
poison-generating flags.

The effect of the register wastage here is probably in practice not very 
dramatic because this only happens for quite large memsets that will take some 
time anyway, but do let me know if you have suggestions on how to avoid it.

https://github.com/llvm/llvm-project/pull/169040
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LowerMemIntrinsics] Optimize memset lowering (PR #169040)

2025-11-21 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)


Changes

This patch changes the memset lowering to match the optimized memcpy lowering.
The memset lowering now queries TTI.getMemcpyLoopLoweringType for a preferred
memory access type. If that type is larger than a byte, the memset is lowered
into two loops: a main loop that stores a sufficiently wide vector splat of the
SetValue with the preferred memory access type and a residual loop that covers
the remaining bytes individually. If the memset size is statically known, the
residual loop is replaced by a sequence of stores.

This improves memset performance on gfx1030 (AMDGPU) in microbenchmarks by
around 7-20x.

I'm planning similar treatment for memset.pattern as a follow-up PR.

For SWDEV-543208.

---

Patch is 343.27 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/169040.diff


17 Files Affected:

- (modified) llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h (+2-1) 
- (modified) llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp (+4-2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (+2-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+2-1) 
- (modified) llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp (+1-1) 
- (modified) llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp (+7-4) 
- (modified) llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp (+197-7) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll (+103-11) 
- (modified) llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll 
(+686-90) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll 
(+218-116) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll 
(+15-36) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll (+55-13) 
- (modified) llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll (+1616) 
- (added) llvm/test/CodeGen/AMDGPU/memset-param-combinations.ll (+1900) 
- (modified) llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll (+4-4) 
- (modified) llvm/test/CodeGen/SPIRV/llvm-intrinsics/memset.ll (+2-2) 
- (modified) 
llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
 (+12-12) 


``diff
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h 
b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index d4e72a60fc1ea..8924b8b1e6e54 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -59,7 +59,8 @@ LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove,
   const TargetTransformInfo &TTI);
 
 /// Expand \p MemSet as a loop. \p MemSet is not deleted.
-LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet);
+LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet,
+ const TargetTransformInfo &TTI);
 
 /// Expand \p MemSetPattern as a loop. \p MemSet is not deleted.
 LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet);
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp 
b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index d738dc4eea36d..88e2bb81f9e3b 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -369,7 +369,7 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
 canEmitLibcall(TM, ParentFunc, RTLIB::MEMSET))
   break;
 
-expandMemSetAsLoop(Memset);
+expandMemSetAsLoop(Memset, TTI);
 Changed = true;
 Memset->eraseFromParent();
   }
@@ -384,7 +384,9 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
   if (isa(Memset->getLength()))
 break;
 
-  expandMemSetAsLoop(Memset);
+  Function *ParentFunc = Memset->getFunction();
+  const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+  expandMemSetAsLoop(Memset, TTI);
   Changed = true;
   Memset->eraseFromParent();
   break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index fdff21b6ef8df..76f1e006bbf74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -635,7 +635,8 @@ bool 
StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst(
 MemSetInst &MSI) {
   if (MSI.getDestAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER)
 return false;
-  llvm::expandMemSetAsLoop(&MSI);
+  llvm::expandMemSetAsLoop(&MSI,
+   TM->getTargetTransformInfo(*MSI.getFunction()));
   MSI.eraseFromParent();
   return true;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 03d16fdd54c42..5a68dca1b10b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@

[llvm-branch-commits] [llvm] [LowerMemIntrinsics] Optimize memset lowering (PR #169040)

2025-11-21 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a ready_for_review 
https://github.com/llvm/llvm-project/pull/169040
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Implement CFI for CSR spills (PR #164724)

2025-11-21 Thread Scott Linder via llvm-branch-commits


@@ -2244,17 +2244,49 @@ bool 
SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
   return true;
 }
 
+static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,

slinder1 wrote:

@arsenm I pushed a version which just walks regunit roots, see 
https://github.com/llvm/llvm-project/pull/164724/files#diff-9aed5b156d320e04c0a60491e5f42e1e9e275052fc148515089bc4796937c0f7R2247

Ignore the rest of the diff for now, I still can't get graphite to update these 
and need to manually go through and clean up the branches

https://github.com/llvm/llvm-project/pull/164724
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Delinarization] Add test for inferred array size exceeds integer range (PR #169048)

2025-11-21 Thread Ryotaro Kasuga via llvm-branch-commits

https://github.com/kasuga-fj created 
https://github.com/llvm/llvm-project/pull/169048

None

>From 1ced6d1a40418905c97f57126c2b5cbfba136ea6 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga 
Date: Fri, 21 Nov 2025 13:57:06 +
Subject: [PATCH] [Delinarization] Add test for inferred array size exceeds
 integer range

---
 .../validation_parametric_sizes.ll| 87 +++
 1 file changed, 87 insertions(+)
 create mode 100644 
llvm/test/Analysis/Delinearization/validation_parametric_sizes.ll

diff --git a/llvm/test/Analysis/Delinearization/validation_parametric_sizes.ll 
b/llvm/test/Analysis/Delinearization/validation_parametric_sizes.ll
new file mode 100644
index 0..572875def52be
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/validation_parametric_sizes.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes='print' -disable-output 2>&1 | 
FileCheck %s
+;
+; for (i = 0; i < n; i++)
+;   for (j = 0; j < m; j++)
+; for (k = 0; k < o; k++)
+;   if (i < 5 && j < 5 && k < 5)
+; A[i*m*o + j*o + k] = 0;
+;
+; FIXME: The product (%m * %o) can overflow, e.g., (%m, %o) = (2^32 - 1, 
2^32). In this case,
+; the delinearization `A[%i][%j][%k]` with its size `[][%m][%o]` should be
+; considered invalid, because the address calculation will be:
+;
+; A[%i][%j][%k] = %A + %i*%m*%o + %j*%o + %k
+;   = %A - 2^32*%i + %j*2^32 + %k
+;   = %A + 2^32*(%j - %i) + %k
+;
+; It means `&A[0][0][%k]` = `&A[1][1][%k]` = ..., which implies that the
+; mapping from subscripts to an address is not injective. We need to ensure 
that
+; the product of all dimensions (in this case `%m * %o`) doesn't overflow.
+;
+define void @f(i64 %n, i64 %m, i64 %o, ptr %A) {
+; CHECK-LABEL: 'f'
+; CHECK-NEXT:  Inst: store i8 0, ptr %gep, align 1
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,(%m * 
%o)}<%for.i.header>,+,%o}<%for.j.header>,+,1}<%for.k.header>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m][%o] with elements of 1 bytes.
+; CHECK-NEXT:  
ArrayRef[{0,+,1}<%for.i.header>][{0,+,1}<%for.j.header>][{0,+,1}<%for.k.header>]
+; CHECK-NEXT:  Delinearization validation: Succeeded
+;
+entry:
+  %guard.i = icmp sgt i64 %n, 0
+  %m_o = mul i64 %m, %o
+  br i1 %guard.i, label %for.i.header, label %exit
+
+for.i.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  %i_m_o = mul i64 %i, %m_o
+  br label %for.j.preheader
+
+for.j.preheader:
+  %guard.j = icmp sgt i64 %m, 0
+  br i1 %guard.j, label %for.j.header, label %for.i.latch
+
+for.j.header:
+  %j = phi i64 [ 0, %for.j.preheader ], [ %j.inc, %for.j.latch ]
+  %j_o = mul i64 %j, %o
+  br label %for.k.preheader
+
+for.k.preheader:
+  %guard.k = icmp sgt i64 %o, 0
+  br i1 %guard.k, label %for.k.header, label %for.j.latch
+
+for.k.header:
+  %k = phi i64 [ 0, %for.k.preheader ], [ %k.inc, %for.k.latch ]
+  %cond.i = icmp slt i64 %i, 5
+  %cond.j = icmp slt i64 %j, 5
+  %cond.k = icmp slt i64 %k, 5
+  %cond.ij = and i1 %cond.i, %cond.j
+  %cond = and i1 %cond.ij, %cond.k
+  br i1 %cond, label %if.then, label %for.k.latch
+
+if.then:
+  %offset.tmp = add i64 %i_m_o, %j_o
+  %offset = add i64 %offset.tmp, %k
+  %gep = getelementptr inbounds i8, ptr %A, i64 %offset
+  store i8 0, ptr %gep, align 1
+  br label %for.k.latch
+
+for.k.latch:
+  %k.inc = add nsw i64 %k, 1
+  %ec.k = icmp eq i64 %k.inc, %o
+  br i1 %ec.k, label %for.j.latch, label %for.k.header
+
+for.j.latch:
+  %j.inc = add nsw i64 %j, 1
+  %ec.j = icmp eq i64 %j.inc, %m
+  br i1 %ec.j, label %for.i.latch, label %for.j.header
+
+for.i.latch:
+  %i.inc = add nsw i64 %i, 1
+  %ec.i = icmp eq i64 %i.inc, %n
+  br i1 %ec.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Delinarization] Add test for inferred array size exceeds integer range (PR #169048)

2025-11-21 Thread Ryotaro Kasuga via llvm-branch-commits

kasuga-fj wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.com/github/pr/llvm/llvm-project/169048?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#169048** https://app.graphite.com/github/pr/llvm/llvm-project/169048?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/169048?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#169047** https://app.graphite.com/github/pr/llvm/llvm-project/169047?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/169048
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] `` must be nested in `` (PR #168972)

2025-11-21 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 updated 
https://github.com/llvm/llvm-project/pull/168972

>From ea76e818bfa0c6ee1b414b82e7f14841707c532f Mon Sep 17 00:00:00 2001
From: Erick Velez 
Date: Thu, 20 Nov 2025 15:46:21 -0800
Subject: [PATCH] [clang-doc] `` must be nested in ``

The HTML spec states that only `` can be children of ``. Nested
`` tags in an unordered list must be children of ``.
---
 .../clang-doc/assets/class-template.mustache  | 88 +++
 .../assets/namespace-template.mustache| 32 ---
 .../test/clang-doc/mustache-index.cpp | 22 ++---
 3 files changed, 80 insertions(+), 62 deletions(-)

diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache 
b/clang-tools-extra/clang-doc/assets/class-template.mustache
index 8eb28f33e65f6..c5187026a2399 100644
--- a/clang-tools-extra/clang-doc/assets/class-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/class-template.mustache
@@ -50,18 +50,21 @@
 
 Public 
Members
 
-
-{{#PublicMembers}}
-
-{{Name}}
-
-{{/PublicMembers}}
-
+
+
+{{#PublicMembers}}
+
+{{Name}}
+
+{{/PublicMembers}}
+
+
 {{/HasPublicMembers}}
 {{#ProtectedMembers}}
-
-Protected Members
-
+
+Protected 
Members
+
+
 
 {{#Obj}}
 
@@ -69,42 +72,49 @@
 
 {{/Obj}}
 
+
 {{/ProtectedMembers}}
 {{#HasPublicFunctions}}
 
 Public 
Method
 
-
-{{#PublicFunctions}}
-
-{{Name}}
-
-{{/PublicFunctions}}
-
+
+
+{{#PublicFunctions}}
+
+{{Name}}
+
+{{/PublicFunctions}}
+
+
 {{/HasPublicFunctions}}
 {{#ProtectedFunction}}
 
 Protected Method
 
-
-{{#Obj}}
-
-{{Name}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{Name}}
+
+{{/Obj}}
+
+
 {{/ProtectedFunction}}
 {{#Enums}}
 
 Enums
 
-
-{{#Obj}}
-
-{{EnumName}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{EnumName}}
+
+{{/Obj}}
+
+
 {{/Enums}}
 {{#Typedef}}
 Typedef
@@ -113,13 +123,15 @@
 
 Inner Classes
 
-
-{{#Links}}
-
-{{Name}}
-
-{{/Links}}
-
+
+
+{{#Links}}
+
+{{Name}}
+
+{{/Links}}
+
+
 {{/Record}}
 
 
diff --git a/clang-tools-extra/clang-doc/assets/namespace-template.mustache 
b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
index 614023b9f6723..8a5be46aee28b 100644
--- a/clang-tools-extra/clang-doc/assets/namespace-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
@@ -50,25 +50,29 @@
 
 Enums
 
-
-{{#Enums}}
-
-{{Name}}
-
-{{/Enums}}
-
+  

[llvm-branch-commits] [llvm] 6195f9e - Revert "[ORC] Tailor ELF debugger support plugin to load-address patching onl…"

2025-11-21 Thread via llvm-branch-commits

Author: Stefan Gränitz
Date: 2025-11-21T19:08:05+01:00
New Revision: 6195f9e528fbc1775c0f8fbfce20d3df18db732a

URL: 
https://github.com/llvm/llvm-project/commit/6195f9e528fbc1775c0f8fbfce20d3df18db732a
DIFF: 
https://github.com/llvm/llvm-project/commit/6195f9e528fbc1775c0f8fbfce20d3df18db732a.diff

LOG: Revert "[ORC] Tailor ELF debugger support plugin to load-address patching 
onl…"

This reverts commit db5eeddbd3f1d5cdb86e365a2a80b036bd66de7f.

Added: 


Modified: 
llvm/include/llvm/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.h
llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp

Removed: 




diff  --git 
a/llvm/include/llvm/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.h 
b/llvm/include/llvm/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.h
index 92dbfe1c79e6e..d946a029fd2ec 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.h
@@ -23,6 +23,7 @@
 #include "llvm/Support/MemoryBufferRef.h"
 #include "llvm/TargetParser/Triple.h"
 
+#include 
 #include 
 #include 
 #include 
@@ -32,24 +33,35 @@ namespace orc {
 
 class DebugObject;
 
-/// Debugger support for ELF platforms with the GDB JIT Interface. The plugin
-/// emits and manages a separate debug object allocation in addition to the
-/// LinkGraph's own allocation and it notifies the debugger when necessary.
+/// Creates and manages DebugObjects for JITLink artifacts.
+///
+/// DebugObjects are created when linking for a MaterializationResponsibility
+/// starts. They are pending as long as materialization is in progress.
+///
+/// There can only be one pending DebugObject per 
MaterializationResponsibility.
+/// If materialization fails, pending DebugObjects are discarded.
+///
+/// Once executable code for the MaterializationResponsibility is emitted, the
+/// corresponding DebugObject is finalized to target memory and the provided
+/// DebugObjectRegistrar is notified. Ownership of DebugObjects remains with 
the
+/// plugin.
 ///
 class LLVM_ABI ELFDebugObjectPlugin : public ObjectLinkingLayer::Plugin {
 public:
-  /// Create the plugin for the given session and set additional options
+  /// Create the plugin to submit DebugObjects for JITLink artifacts. For all
+  /// options the recommended setting is true.
   ///
   /// RequireDebugSections:
-  ///   Emit debug objects only if the LinkGraph contains debug info. Turning
-  ///   this off allows minimal debugging based on raw symbol names, but it
-  ///   comes with significant overhead for release configurations.
+  ///   Submit debug objects to the executor only if they contain actual debug
+  ///   info. Turning this off may allow minimal debugging based on raw symbol
+  ///   names. Note that this may cause significant memory and transport
+  ///   overhead for objects built with a release configuration.
   ///
   /// AutoRegisterCode:
   ///   Notify the debugger for each new debug object. This is a good default
   ///   mode, but it may cause significant overhead when adding many modules in
-  ///   sequence. Otherwise the user must call __jit_debug_register_code() in
-  ///   the debug session manually.
+  ///   sequence. When turning this off, the user has to issue the call to
+  ///   __jit_debug_register_code() on the executor side manually.
   ///
   ELFDebugObjectPlugin(ExecutionSession &ES, bool RequireDebugSections,
bool AutoRegisterCode, Error &Err);
@@ -57,7 +69,7 @@ class LLVM_ABI ELFDebugObjectPlugin : public 
ObjectLinkingLayer::Plugin {
 
   void notifyMaterializing(MaterializationResponsibility &MR,
jitlink::LinkGraph &G, jitlink::JITLinkContext &Ctx,
-   MemoryBufferRef InputObj) override;
+   MemoryBufferRef InputObject) override;
 
   Error notifyFailed(MaterializationResponsibility &MR) override;
   Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override;
@@ -82,8 +94,6 @@ class LLVM_ABI ELFDebugObjectPlugin : public 
ObjectLinkingLayer::Plugin {
   ExecutorAddr RegistrationAction;
   bool RequireDebugSections;
   bool AutoRegisterCode;
-
-  DebugObject *getPendingDebugObj(MaterializationResponsibility &MR);
 };
 
 } // namespace orc

diff  --git a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp 
b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
index 0e9b9a7ff76d3..653645ff03f15 100644
--- a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp
@@ -17,17 +17,11 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/ELF.h"
-#include "llvm/ExecutionEngine/JITLink/JITLink.h"
 #include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
 #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManage

[llvm-branch-commits] [clang-tools-extra] [clang-doc] `` must be nested in `` (PR #168972)

2025-11-21 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 updated 
https://github.com/llvm/llvm-project/pull/168972

>From ea76e818bfa0c6ee1b414b82e7f14841707c532f Mon Sep 17 00:00:00 2001
From: Erick Velez 
Date: Thu, 20 Nov 2025 15:46:21 -0800
Subject: [PATCH] [clang-doc] `` must be nested in ``

The HTML spec states that only `` can be children of ``. Nested
`` tags in an unordered list must be children of ``.
---
 .../clang-doc/assets/class-template.mustache  | 88 +++
 .../assets/namespace-template.mustache| 32 ---
 .../test/clang-doc/mustache-index.cpp | 22 ++---
 3 files changed, 80 insertions(+), 62 deletions(-)

diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache 
b/clang-tools-extra/clang-doc/assets/class-template.mustache
index 8eb28f33e65f6..c5187026a2399 100644
--- a/clang-tools-extra/clang-doc/assets/class-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/class-template.mustache
@@ -50,18 +50,21 @@
 
 Public 
Members
 
-
-{{#PublicMembers}}
-
-{{Name}}
-
-{{/PublicMembers}}
-
+
+
+{{#PublicMembers}}
+
+{{Name}}
+
+{{/PublicMembers}}
+
+
 {{/HasPublicMembers}}
 {{#ProtectedMembers}}
-
-Protected Members
-
+
+Protected 
Members
+
+
 
 {{#Obj}}
 
@@ -69,42 +72,49 @@
 
 {{/Obj}}
 
+
 {{/ProtectedMembers}}
 {{#HasPublicFunctions}}
 
 Public 
Method
 
-
-{{#PublicFunctions}}
-
-{{Name}}
-
-{{/PublicFunctions}}
-
+
+
+{{#PublicFunctions}}
+
+{{Name}}
+
+{{/PublicFunctions}}
+
+
 {{/HasPublicFunctions}}
 {{#ProtectedFunction}}
 
 Protected Method
 
-
-{{#Obj}}
-
-{{Name}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{Name}}
+
+{{/Obj}}
+
+
 {{/ProtectedFunction}}
 {{#Enums}}
 
 Enums
 
-
-{{#Obj}}
-
-{{EnumName}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{EnumName}}
+
+{{/Obj}}
+
+
 {{/Enums}}
 {{#Typedef}}
 Typedef
@@ -113,13 +123,15 @@
 
 Inner Classes
 
-
-{{#Links}}
-
-{{Name}}
-
-{{/Links}}
-
+
+
+{{#Links}}
+
+{{Name}}
+
+{{/Links}}
+
+
 {{/Record}}
 
 
diff --git a/clang-tools-extra/clang-doc/assets/namespace-template.mustache 
b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
index 614023b9f6723..8a5be46aee28b 100644
--- a/clang-tools-extra/clang-doc/assets/namespace-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
@@ -50,25 +50,29 @@
 
 Enums
 
-
-{{#Enums}}
-
-{{Name}}
-
-{{/Enums}}
-
+  

[llvm-branch-commits] [llvm] AMDGPU: Handle invariant when lowering global loads (PR #168914)

2025-11-21 Thread Shilei Tian via llvm-branch-commits


@@ -11944,7 +11944,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, 
SelectionDAG &DAG) const {
   AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
   (AS == AMDGPUAS::GLOBAL_ADDRESS &&
Subtarget->getScalarizeGlobalBehavior() && Load->isSimple() &&
-   isMemOpHasNoClobberedMemOperand(Load))) {
+   (Load->isInvariant() || isMemOpHasNoClobberedMemOperand(Load {

shiltian wrote:

and there is no test change with this?

https://github.com/llvm/llvm-project/pull/168914
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Github] Remove use of setup-windows and install-ninja in llvm tests (PR #168987)

2025-11-21 Thread Aiden Grossman via llvm-branch-commits

boomanaiden154 wrote:

> Is llvm-project-tests still used? I wonder if we can just delete the whole 
> file?

In the release branch, yes. We haven't back ported the patches to fully remove 
it. Not sure it's worth doing that for just the SPIRV/libclang tests.

https://github.com/llvm/llvm-project/pull/168987
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Github] Remove install-ninja/setup-windows (PR #168985)

2025-11-21 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar approved this pull request.


https://github.com/llvm/llvm-project/pull/168985
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Github] Remove use of setup-windows and install-ninja in llvm tests (PR #168987)

2025-11-21 Thread Tom Stellard via llvm-branch-commits

tstellar wrote:

Is llvm-project-tests still used?  I wonder if we can just delete the whole 
file?

https://github.com/llvm/llvm-project/pull/168987
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Github] Remove use of setup-windows and install-ninja in llvm tests (PR #168987)

2025-11-21 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar approved this pull request.


https://github.com/llvm/llvm-project/pull/168987
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Implement CFI for CSR spills (PR #164724)

2025-11-21 Thread Scott Linder via llvm-branch-commits

https://github.com/slinder1 edited 
https://github.com/llvm/llvm-project/pull/164724
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-21][MC] Fix fragments for sections bigger than 4G (PR #169121)

2025-11-21 Thread via llvm-branch-commits

github-actions[bot] wrote:



Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this 
page.

If this is not working for you, it is probably because you do not have write 
permissions for the repository. In which case you can instead tag reviewers by 
name in a comment by using `@` followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a 
review by "ping"ing the PR by adding a comment “Ping”. The common courtesy 
"ping" rate is once a week. Please remember that you are asking for valuable 
time from other developers.

If you have further questions, they may be answered by the [LLVM GitHub User 
Guide](https://llvm.org/docs/GitHub.html).

You can also ask questions in a comment on this PR, on the [LLVM 
Discord](https://discord.com/invite/xS7Z362) or on the 
[forums](https://discourse.llvm.org/).

https://github.com/llvm/llvm-project/pull/169121
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-21][MC] Fix fragments for sections bigger than 4G (PR #169121)

2025-11-21 Thread Lydia Kim via llvm-branch-commits

https://github.com/lydkim created 
https://github.com/llvm/llvm-project/pull/169121

An OOM situation occurs in llvm-dwp when running big builds where sections are 
>4 GB. The problem is addressed in this post: 
https://github.com/llvm/llvm-project/issues/168923

Problem: 
When `ContentStorage` exceeds 4GB, the `uint32_t ContentEnd` field overflows 
when assigned in `doneAppending()`. Later in `getContentsForAppending()`, the 
calculation `Size = ContentEnd - ContentStart` causes unsigned integer 
underflow, producing large arbitrary memory allocations.

To address the OOM issue while keeping MCEncodedFragment size minimal, this 
solution transitions from a begin/end representation (`uint32_t ContentStart`, 
`uint32_t ContentEnd`) to a begin/size representation (`uint64_t ContentStart`, 
`uint32_t ContentSize`). The new approach changes `ContentStart` to 64-bit to 
support positions beyond 4GB, while `ContentSize` remains 32-bit, limiting 
individual fragments to 4GB.

This problem only exists in the llvm-21 branch. The recent refactoring in main 
branch makes this unnecessary there.

Fixes #168923

>From 233e674a7786f5551317848e9ea783d26494c325 Mon Sep 17 00:00:00 2001
From: Lydia Kim 
Date: Fri, 21 Nov 2025 11:41:23 -0800
Subject: [PATCH] [server-llvm-21][MC] Fixing vector overflow

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: https://phabricator.intern.facebook.com/D87662897
---
 llvm/include/llvm/MC/MCSection.h | 25 +
 llvm/lib/MC/MCSection.cpp|  4 ++--
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 64b13972bfca1..9daaebf7e7935 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -298,8 +298,8 @@ class MCFragment {
 /// data.
 class MCEncodedFragment : public MCFragment {
   uint8_t BundlePadding = 0;
-  uint32_t ContentStart = 0;
-  uint32_t ContentEnd = 0;
+  uint32_t ContentSize = 0;
+  uint64_t ContentStart = 0;
   uint32_t FixupStart = 0;
   uint32_t FixupEnd = 0;
 
@@ -360,22 +360,23 @@ class MCEncodedFragment : public MCFragment {
 
   // Content-related functions manage parent's storage using ContentStart and
   // ContentSize.
-  void clearContents() { ContentEnd = ContentStart; }
+  void clearContents() { ContentSize = 0; }
   // Get a SmallVector reference. The caller should call doneAppending to 
update
-  // `ContentEnd`.
+  // `ContentSize`.
   SmallVectorImpl &getContentsForAppending() {
 SmallVectorImpl &S = getParent()->ContentStorage;
-if (LLVM_UNLIKELY(ContentEnd != S.size())) {
+if (LLVM_UNLIKELY(ContentStart + ContentSize != S.size())) {
   // Move the elements to the end. Reserve space to avoid invalidating
   // S.begin()+I for `append`.
-  auto Size = ContentEnd - ContentStart;
   auto I = std::exchange(ContentStart, S.size());
-  S.reserve(S.size() + Size);
-  S.append(S.begin() + I, S.begin() + I + Size);
+  S.reserve(S.size() + ContentSize);
+  S.append(S.begin() + I, S.begin() + I + ContentSize);
 }
 return S;
   }
-  void doneAppending() { ContentEnd = getParent()->ContentStorage.size(); }
+  void doneAppending() {
+ContentSize = getParent()->ContentStorage.size() - ContentStart;
+  }
   void appendContents(ArrayRef Contents) {
 getContentsForAppending().append(Contents.begin(), Contents.end());
 doneAppending();
@@ -387,11 +388,11 @@ class MCEncodedFragment : public MCFragment {
   LLVM_ABI void setContents(ArrayRef Contents);
   MutableArrayRef getContents() {
 return MutableArrayRef(getParent()->ContentStorage)
-.slice(ContentStart, ContentEnd - ContentStart);
+.slice(ContentStart, ContentSize);
   }
   ArrayRef getContents() const {
 return ArrayRef(getParent()->ContentStorage)
-.slice(ContentStart, ContentEnd - ContentStart);
+.slice(ContentStart, ContentSize);
   }
 
   // Fixup-related functions manage parent's storage using FixupStart and
@@ -409,7 +410,7 @@ class MCEncodedFragment : public MCFragment {
 .slice(FixupStart, FixupEnd - FixupStart);
   }
 
-  size_t getSize() const { return ContentEnd - ContentStart; }
+  size_t getSize() const { return ContentSize; }
 };
 
 /// Fragment for data and encoded instructions.
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index a7330692571de..97f591fbf0e28 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -84,11 +84,11 @@ LLVM_DUMP_METHOD void MCSection::dump(
 
 void MCEncodedFragment::setContents(ArrayRef Contents) {
   auto &S = getParent()->ContentStorage;
-  if (ContentStart + Contents.size() > ContentEnd) {
+  if (Contents.size() > ContentSize) {
 ContentStart = S.size();
 S.resize_for_overwrite(S.size() + Contents.size());
   }
-  ContentEnd = ContentStart + Contents.size();
+  ContentSize = Contents.size();
   llvm::copy(Contents, S.begin() + ContentStart)

[llvm-branch-commits] [llvm] [llvm-21][MC] Fix fragments for sections bigger than 4G (PR #169121)

2025-11-21 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-mc

Author: Lydia Kim (lydkim)


Changes

An OOM situation occurs in llvm-dwp when running big builds where sections are 
>4 GB. The problem is addressed in this post: 
https://github.com/llvm/llvm-project/issues/168923

Problem: 
When `ContentStorage` exceeds 4GB, the `uint32_t ContentEnd` field overflows 
when assigned in `doneAppending()`. Later in `getContentsForAppending()`, the 
calculation `Size = ContentEnd - ContentStart` causes unsigned integer 
underflow, producing large arbitrary memory allocations.

To address the OOM issue while keeping MCEncodedFragment size minimal, this 
solution transitions from a begin/end representation (`uint32_t ContentStart`, 
`uint32_t ContentEnd`) to a begin/size representation (`uint64_t ContentStart`, 
`uint32_t ContentSize`). The new approach changes `ContentStart` to 64-bit to 
support positions beyond 4GB, while `ContentSize` remains 32-bit, limiting 
individual fragments to 4GB.

This problem only exists in the llvm-21 branch. The recent refactoring in main 
branch makes this unnecessary there.

Fixes #168923

---
Full diff: https://github.com/llvm/llvm-project/pull/169121.diff


2 Files Affected:

- (modified) llvm/include/llvm/MC/MCSection.h (+13-12) 
- (modified) llvm/lib/MC/MCSection.cpp (+2-2) 


``diff
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 64b13972bfca1..9daaebf7e7935 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -298,8 +298,8 @@ class MCFragment {
 /// data.
 class MCEncodedFragment : public MCFragment {
   uint8_t BundlePadding = 0;
-  uint32_t ContentStart = 0;
-  uint32_t ContentEnd = 0;
+  uint32_t ContentSize = 0;
+  uint64_t ContentStart = 0;
   uint32_t FixupStart = 0;
   uint32_t FixupEnd = 0;
 
@@ -360,22 +360,23 @@ class MCEncodedFragment : public MCFragment {
 
   // Content-related functions manage parent's storage using ContentStart and
   // ContentSize.
-  void clearContents() { ContentEnd = ContentStart; }
+  void clearContents() { ContentSize = 0; }
   // Get a SmallVector reference. The caller should call doneAppending to 
update
-  // `ContentEnd`.
+  // `ContentSize`.
   SmallVectorImpl &getContentsForAppending() {
 SmallVectorImpl &S = getParent()->ContentStorage;
-if (LLVM_UNLIKELY(ContentEnd != S.size())) {
+if (LLVM_UNLIKELY(ContentStart + ContentSize != S.size())) {
   // Move the elements to the end. Reserve space to avoid invalidating
   // S.begin()+I for `append`.
-  auto Size = ContentEnd - ContentStart;
   auto I = std::exchange(ContentStart, S.size());
-  S.reserve(S.size() + Size);
-  S.append(S.begin() + I, S.begin() + I + Size);
+  S.reserve(S.size() + ContentSize);
+  S.append(S.begin() + I, S.begin() + I + ContentSize);
 }
 return S;
   }
-  void doneAppending() { ContentEnd = getParent()->ContentStorage.size(); }
+  void doneAppending() {
+ContentSize = getParent()->ContentStorage.size() - ContentStart;
+  }
   void appendContents(ArrayRef Contents) {
 getContentsForAppending().append(Contents.begin(), Contents.end());
 doneAppending();
@@ -387,11 +388,11 @@ class MCEncodedFragment : public MCFragment {
   LLVM_ABI void setContents(ArrayRef Contents);
   MutableArrayRef getContents() {
 return MutableArrayRef(getParent()->ContentStorage)
-.slice(ContentStart, ContentEnd - ContentStart);
+.slice(ContentStart, ContentSize);
   }
   ArrayRef getContents() const {
 return ArrayRef(getParent()->ContentStorage)
-.slice(ContentStart, ContentEnd - ContentStart);
+.slice(ContentStart, ContentSize);
   }
 
   // Fixup-related functions manage parent's storage using FixupStart and
@@ -409,7 +410,7 @@ class MCEncodedFragment : public MCFragment {
 .slice(FixupStart, FixupEnd - FixupStart);
   }
 
-  size_t getSize() const { return ContentEnd - ContentStart; }
+  size_t getSize() const { return ContentSize; }
 };
 
 /// Fragment for data and encoded instructions.
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index a7330692571de..97f591fbf0e28 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -84,11 +84,11 @@ LLVM_DUMP_METHOD void MCSection::dump(
 
 void MCEncodedFragment::setContents(ArrayRef Contents) {
   auto &S = getParent()->ContentStorage;
-  if (ContentStart + Contents.size() > ContentEnd) {
+  if (Contents.size() > ContentSize) {
 ContentStart = S.size();
 S.resize_for_overwrite(S.size() + Contents.size());
   }
-  ContentEnd = ContentStart + Contents.size();
+  ContentSize = Contents.size();
   llvm::copy(Contents, S.begin() + ContentStart);
 }
 

``




https://github.com/llvm/llvm-project/pull/169121
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-21][MC] Fix fragments for sections bigger than 4G (PR #169121)

2025-11-21 Thread Matthias Braun via llvm-branch-commits

https://github.com/MatzeB milestoned 
https://github.com/llvm/llvm-project/pull/169121
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add definition information to class templates (PR #169109)

2025-11-21 Thread Paul Kirth via llvm-branch-commits

https://github.com/ilovepi approved this pull request.


https://github.com/llvm/llvm-project/pull/169109
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RegBankLegalize rules for G_FABS and G_FNEG (PR #168411)

2025-11-21 Thread Chinmay Deshpande via llvm-branch-commits

chinmaydd wrote:

Support for `G_STRICT_FADD/SUB/MUL` is blocked by `G_FABS` and `G_FNEG`. I 
would like to see this merged if possible. Thanks !

https://github.com/llvm/llvm-project/pull/168411
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)

2025-11-21 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168820

From e673cdaee95d870dd5e2fa13ab064f6dbd0ba273 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
 narrowing case

Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:

1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
   allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
   compatible, which allows foldLengthChangingShuffles to successfully
   recognize a chain that can be folded.

There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in 
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.

commit-id:c151bb04
---
 .../Transforms/Vectorize/VectorCombine.cpp| 22 +--
 .../VectorCombine/AMDGPU/extract-insert-i8.ll | 17 ++
 .../X86/extract-insert-poison.ll  | 12 ++
 .../VectorCombine/X86/extract-insert.ll   |  8 +++
 .../Transforms/VectorCombine/X86/pr126085.ll  |  4 ++--
 5 files changed, 22 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fc39f4123fac4..9025b93f75458 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4455,22 +4455,15 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   SmallVector Mask(NumDstElts, PoisonMaskElem);
 
   bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
-  bool IsExtIdxInBounds = ExtIdx < NumDstElts;
   bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
   if (NeedDstSrcSwap) {
 SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = 0;
-else
-  Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
 std::swap(DstVec, SrcVec);
   } else {
 SK = TargetTransformInfo::SK_PermuteTwoSrc;
 std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = NumDstElts;
-else
-  Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
   }
 
   // Cost
@@ -4491,14 +4484,11 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
 nullptr, {DstVec, SrcVec});
   } else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
 ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
-  ExtToVecMask[ExtIdx] = ExtIdx;
-else
-  ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
 // Add cost for expanding or narrowing
 NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
  DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll 
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index eaab7199a3cf3..442a93689a791 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -91,21 +91,8 @@ entry:
 define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
 ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
 ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, 
<8 x i32> 
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> 
poison, <8 x i32> 
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> 
[[TMP1]], <8

[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)

2025-11-21 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168819

From 459939f82086d02c39f5d6eeae141c25f9932d40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
 shuffles

Such chains can arise from folding insert/extract chains.

commit-id:a960175d
---
 .../Transforms/Vectorize/VectorCombine.cpp| 168 ++
 .../VectorCombine/AMDGPU/extract-insert-i8.ll |  41 +
 2 files changed, 176 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index f1890e4f5fb95..fc39f4123fac4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -139,6 +139,7 @@ class VectorCombine {
   bool foldShuffleOfSelects(Instruction &I);
   bool foldShuffleOfCastops(Instruction &I);
   bool foldShuffleOfShuffles(Instruction &I);
+  bool foldShufflesOfLengthChangingShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
@@ -2877,6 +2878,171 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction 
&I) {
   return true;
 }
 
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+///   "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+//  (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+///   "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+  unsigned ChainLength = 0;
+  SmallVector Mask;
+  SmallVector YMask;
+  InstructionCost OldCost = 0;
+  InstructionCost NewCost = 0;
+  FixedVectorType *TrunkType = cast(I.getType());
+  Value *Trunk = &I;
+  unsigned NumTrunkElts = TrunkType->getNumElements();
+  FixedVectorType *YType = nullptr;
+  Value *Y = nullptr;
+
+  for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+  break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+  break;
+if (OuterV0->getType() != TrunkType) {
+  // This shuffle is not length-preserving, so it cannot be part of the
+  // chain.
+  break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0), 
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1), 
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+  // Only handle the case of exactly one leaf in each step. The "two 
leaves"
+  // case is handled by foldShuffleOfShuffles.
+  break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+  std::swap(OuterV0, OuterV1);
+  std::swap(InnerMask0, InnerMask1);
+  std::swap(A0, A1);
+  std::swap(B0, B1);
+  llvm::append_range(CommutedOuterMask, OuterMask);
+  for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+  continue;
+if (M < (int)NumTrunkElts)
+  M += NumTrunkElts;
+else
+  M -= NumTrunkElts;
+  }
+  OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+  break;
+
+if (!isa(A1)) {
+  if (!Y)
+Y = A1;
+  else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+  if (!Y)
+Y = B1;
+  else if (Y != B1)
+break;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+  YType = cast(A1->getType());
+  Mask.assign(OuterMask);
+  YMask.assign(InnerMask1);
+  OldCost = NewCost = LocalOldCost;
+  Trunk = OuterV0;
+  ChainLength++;
+  continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto [CombinedM, LeafM] : llvm::zip(NewYMask, InnerMask1)) {
+  if (LeafM == -1 || CombinedM == LeafM)
+continue;
+  if (CombinedM == -1) {
+CombinedM = LeafM;
+  } else {
+Valid = false;
+break;
+  }
+}
+  

[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)

2025-11-21 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168820

From e673cdaee95d870dd5e2fa13ab064f6dbd0ba273 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
 narrowing case

Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:

1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
   allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
   compatible, which allows foldLengthChangingShuffles to successfully
   recognize a chain that can be folded.

There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in 
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.

commit-id:c151bb04
---
 .../Transforms/Vectorize/VectorCombine.cpp| 22 +--
 .../VectorCombine/AMDGPU/extract-insert-i8.ll | 17 ++
 .../X86/extract-insert-poison.ll  | 12 ++
 .../VectorCombine/X86/extract-insert.ll   |  8 +++
 .../Transforms/VectorCombine/X86/pr126085.ll  |  4 ++--
 5 files changed, 22 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fc39f4123fac4..9025b93f75458 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4455,22 +4455,15 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   SmallVector Mask(NumDstElts, PoisonMaskElem);
 
   bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
-  bool IsExtIdxInBounds = ExtIdx < NumDstElts;
   bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
   if (NeedDstSrcSwap) {
 SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = 0;
-else
-  Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
 std::swap(DstVec, SrcVec);
   } else {
 SK = TargetTransformInfo::SK_PermuteTwoSrc;
 std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = NumDstElts;
-else
-  Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
   }
 
   // Cost
@@ -4491,14 +4484,11 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
 nullptr, {DstVec, SrcVec});
   } else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
 ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
-  ExtToVecMask[ExtIdx] = ExtIdx;
-else
-  ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
 // Add cost for expanding or narrowing
 NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
  DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll 
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index eaab7199a3cf3..442a93689a791 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -91,21 +91,8 @@ entry:
 define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
 ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
 ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, 
<8 x i32> 
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> 
poison, <8 x i32> 
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> 
[[TMP1]], <8

[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)

2025-11-21 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168819

From 459939f82086d02c39f5d6eeae141c25f9932d40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
 shuffles

Such chains can arise from folding insert/extract chains.

commit-id:a960175d
---
 .../Transforms/Vectorize/VectorCombine.cpp| 168 ++
 .../VectorCombine/AMDGPU/extract-insert-i8.ll |  41 +
 2 files changed, 176 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index f1890e4f5fb95..fc39f4123fac4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -139,6 +139,7 @@ class VectorCombine {
   bool foldShuffleOfSelects(Instruction &I);
   bool foldShuffleOfCastops(Instruction &I);
   bool foldShuffleOfShuffles(Instruction &I);
+  bool foldShufflesOfLengthChangingShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
@@ -2877,6 +2878,171 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction 
&I) {
   return true;
 }
 
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+///   "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+//  (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+///   "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+  unsigned ChainLength = 0;
+  SmallVector Mask;
+  SmallVector YMask;
+  InstructionCost OldCost = 0;
+  InstructionCost NewCost = 0;
+  FixedVectorType *TrunkType = cast(I.getType());
+  Value *Trunk = &I;
+  unsigned NumTrunkElts = TrunkType->getNumElements();
+  FixedVectorType *YType = nullptr;
+  Value *Y = nullptr;
+
+  for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+  break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+  break;
+if (OuterV0->getType() != TrunkType) {
+  // This shuffle is not length-preserving, so it cannot be part of the
+  // chain.
+  break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0), 
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1), 
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+  // Only handle the case of exactly one leaf in each step. The "two 
leaves"
+  // case is handled by foldShuffleOfShuffles.
+  break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+  std::swap(OuterV0, OuterV1);
+  std::swap(InnerMask0, InnerMask1);
+  std::swap(A0, A1);
+  std::swap(B0, B1);
+  llvm::append_range(CommutedOuterMask, OuterMask);
+  for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+  continue;
+if (M < (int)NumTrunkElts)
+  M += NumTrunkElts;
+else
+  M -= NumTrunkElts;
+  }
+  OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+  break;
+
+if (!isa(A1)) {
+  if (!Y)
+Y = A1;
+  else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+  if (!Y)
+Y = B1;
+  else if (Y != B1)
+break;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+  YType = cast(A1->getType());
+  Mask.assign(OuterMask);
+  YMask.assign(InnerMask1);
+  OldCost = NewCost = LocalOldCost;
+  Trunk = OuterV0;
+  ChainLength++;
+  continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto [CombinedM, LeafM] : llvm::zip(NewYMask, InnerMask1)) {
+  if (LeafM == -1 || CombinedM == LeafM)
+continue;
+  if (CombinedM == -1) {
+CombinedM = LeafM;
+  } else {
+Valid = false;
+break;
+  }
+}
+  

[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)

2025-11-21 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168820

From e673cdaee95d870dd5e2fa13ab064f6dbd0ba273 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
 narrowing case

Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:

1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
   allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
   compatible, which allows foldLengthChangingShuffles to successfully
   recognize a chain that can be folded.

There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in 
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.

commit-id:c151bb04
---
 .../Transforms/Vectorize/VectorCombine.cpp| 22 +--
 .../VectorCombine/AMDGPU/extract-insert-i8.ll | 17 ++
 .../X86/extract-insert-poison.ll  | 12 ++
 .../VectorCombine/X86/extract-insert.ll   |  8 +++
 .../Transforms/VectorCombine/X86/pr126085.ll  |  4 ++--
 5 files changed, 22 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fc39f4123fac4..9025b93f75458 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4455,22 +4455,15 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   SmallVector Mask(NumDstElts, PoisonMaskElem);
 
   bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
-  bool IsExtIdxInBounds = ExtIdx < NumDstElts;
   bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
   if (NeedDstSrcSwap) {
 SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = 0;
-else
-  Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
 std::swap(DstVec, SrcVec);
   } else {
 SK = TargetTransformInfo::SK_PermuteTwoSrc;
 std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = NumDstElts;
-else
-  Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
   }
 
   // Cost
@@ -4491,14 +4484,11 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
 nullptr, {DstVec, SrcVec});
   } else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
 ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
-  ExtToVecMask[ExtIdx] = ExtIdx;
-else
-  ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
 // Add cost for expanding or narrowing
 NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
  DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll 
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index eaab7199a3cf3..442a93689a791 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -91,21 +91,8 @@ entry:
 define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
 ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
 ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, 
<8 x i32> 
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> 
poison, <8 x i32> 
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> 
[[TMP1]], <8

[llvm-branch-commits] [clang-tools-extra] [clang-doc] `` must be nested in `` (PR #168972)

2025-11-21 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 updated 
https://github.com/llvm/llvm-project/pull/168972

>From 63446511664b2bb0ad8d9adb335a704880c67591 Mon Sep 17 00:00:00 2001
From: Erick Velez 
Date: Thu, 20 Nov 2025 15:46:21 -0800
Subject: [PATCH] [clang-doc] `` must be nested in ``

The HTML spec states that only `` can be children of ``. Nested
`` tags in an unordered list must be children of ``.
---
 .../clang-doc/assets/class-template.mustache  | 88 +++
 .../assets/namespace-template.mustache| 32 ---
 .../test/clang-doc/mustache-index.cpp | 22 ++---
 3 files changed, 80 insertions(+), 62 deletions(-)

diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache 
b/clang-tools-extra/clang-doc/assets/class-template.mustache
index 8eb28f33e65f6..c5187026a2399 100644
--- a/clang-tools-extra/clang-doc/assets/class-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/class-template.mustache
@@ -50,18 +50,21 @@
 
 Public 
Members
 
-
-{{#PublicMembers}}
-
-{{Name}}
-
-{{/PublicMembers}}
-
+
+
+{{#PublicMembers}}
+
+{{Name}}
+
+{{/PublicMembers}}
+
+
 {{/HasPublicMembers}}
 {{#ProtectedMembers}}
-
-Protected Members
-
+
+Protected 
Members
+
+
 
 {{#Obj}}
 
@@ -69,42 +72,49 @@
 
 {{/Obj}}
 
+
 {{/ProtectedMembers}}
 {{#HasPublicFunctions}}
 
 Public 
Method
 
-
-{{#PublicFunctions}}
-
-{{Name}}
-
-{{/PublicFunctions}}
-
+
+
+{{#PublicFunctions}}
+
+{{Name}}
+
+{{/PublicFunctions}}
+
+
 {{/HasPublicFunctions}}
 {{#ProtectedFunction}}
 
 Protected Method
 
-
-{{#Obj}}
-
-{{Name}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{Name}}
+
+{{/Obj}}
+
+
 {{/ProtectedFunction}}
 {{#Enums}}
 
 Enums
 
-
-{{#Obj}}
-
-{{EnumName}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{EnumName}}
+
+{{/Obj}}
+
+
 {{/Enums}}
 {{#Typedef}}
 Typedef
@@ -113,13 +123,15 @@
 
 Inner Classes
 
-
-{{#Links}}
-
-{{Name}}
-
-{{/Links}}
-
+
+
+{{#Links}}
+
+{{Name}}
+
+{{/Links}}
+
+
 {{/Record}}
 
 
diff --git a/clang-tools-extra/clang-doc/assets/namespace-template.mustache 
b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
index 614023b9f6723..8a5be46aee28b 100644
--- a/clang-tools-extra/clang-doc/assets/namespace-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
@@ -50,25 +50,29 @@
 
 Enums
 
-
-{{#Enums}}
-
-{{Name}}
-
-{{/Enums}}
-
+  

[llvm-branch-commits] [clang-tools-extra] [clang-doc] `` must be nested in `` (PR #168972)

2025-11-21 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 updated 
https://github.com/llvm/llvm-project/pull/168972

>From 63446511664b2bb0ad8d9adb335a704880c67591 Mon Sep 17 00:00:00 2001
From: Erick Velez 
Date: Thu, 20 Nov 2025 15:46:21 -0800
Subject: [PATCH] [clang-doc] `` must be nested in ``

The HTML spec states that only `` can be children of ``. Nested
`` tags in an unordered list must be children of ``.
---
 .../clang-doc/assets/class-template.mustache  | 88 +++
 .../assets/namespace-template.mustache| 32 ---
 .../test/clang-doc/mustache-index.cpp | 22 ++---
 3 files changed, 80 insertions(+), 62 deletions(-)

diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache 
b/clang-tools-extra/clang-doc/assets/class-template.mustache
index 8eb28f33e65f6..c5187026a2399 100644
--- a/clang-tools-extra/clang-doc/assets/class-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/class-template.mustache
@@ -50,18 +50,21 @@
 
 Public 
Members
 
-
-{{#PublicMembers}}
-
-{{Name}}
-
-{{/PublicMembers}}
-
+
+
+{{#PublicMembers}}
+
+{{Name}}
+
+{{/PublicMembers}}
+
+
 {{/HasPublicMembers}}
 {{#ProtectedMembers}}
-
-Protected Members
-
+
+Protected 
Members
+
+
 
 {{#Obj}}
 
@@ -69,42 +72,49 @@
 
 {{/Obj}}
 
+
 {{/ProtectedMembers}}
 {{#HasPublicFunctions}}
 
 Public 
Method
 
-
-{{#PublicFunctions}}
-
-{{Name}}
-
-{{/PublicFunctions}}
-
+
+
+{{#PublicFunctions}}
+
+{{Name}}
+
+{{/PublicFunctions}}
+
+
 {{/HasPublicFunctions}}
 {{#ProtectedFunction}}
 
 Protected Method
 
-
-{{#Obj}}
-
-{{Name}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{Name}}
+
+{{/Obj}}
+
+
 {{/ProtectedFunction}}
 {{#Enums}}
 
 Enums
 
-
-{{#Obj}}
-
-{{EnumName}}
-
-{{/Obj}}
-
+
+
+{{#Obj}}
+
+{{EnumName}}
+
+{{/Obj}}
+
+
 {{/Enums}}
 {{#Typedef}}
 Typedef
@@ -113,13 +123,15 @@
 
 Inner Classes
 
-
-{{#Links}}
-
-{{Name}}
-
-{{/Links}}
-
+
+
+{{#Links}}
+
+{{Name}}
+
+{{/Links}}
+
+
 {{/Record}}
 
 
diff --git a/clang-tools-extra/clang-doc/assets/namespace-template.mustache 
b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
index 614023b9f6723..8a5be46aee28b 100644
--- a/clang-tools-extra/clang-doc/assets/namespace-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/namespace-template.mustache
@@ -50,25 +50,29 @@
 
 Enums
 
-
-{{#Enums}}
-
-{{Name}}
-
-{{/Enums}}
-
+  

[llvm-branch-commits] [llvm] [LowerMemIntrinsics] Optimize memset lowering (PR #169040)

2025-11-21 Thread via llvm-branch-commits

github-actions[bot] wrote:


# :penguin: Linux x64 Test Results

* 186433 tests passed
* 4868 tests skipped

https://github.com/llvm/llvm-project/pull/169040
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Make SIShrinkInstructions pass return valid changed state (PR #168833)

2025-11-21 Thread Matt Arsenault via llvm-branch-commits


@@ -580,6 +591,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr 
&MI) const {
 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
   MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
   MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+  MoveIterator = true;

arsenm wrote:

I don't understand why this requires "MoveIterator"?

https://github.com/llvm/llvm-project/pull/168833
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] RuntimeLibcalls: Add mustprogress to common function attributes (PR #167080)

2025-11-21 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/167080
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Use register pair for PC spill (PR #169098)

2025-11-21 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Scott Linder (slinder1)


Changes



---

Patch is 1.26 MiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/169098.diff


65 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp (+20) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll 
(+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll 
(+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll (+2831-2831) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll (+22-22) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll (+998-998) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll (+16-16) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll (+36-36) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll (+88-88) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll (+164-164) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll 
(+140-140) 
- (modified) 
llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll 
(+7-6) 
- (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+92-74) 
- (modified) llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll (+78-78) 
- (modified) llvm/test/CodeGen/AMDGPU/call-args-inreg.ll (+48-48) 
- (modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll (+58-58) 
- (modified) llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll (+53-53) 
- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll 
(+7-7) 
- (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (+7-7) 
- (modified) llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/debug-frame.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll 
(+32-32) 
- (modified) llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll 
(+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll 
(+1-1) 
- (modified) 
llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll (+5-20) 
- (modified) llvm/test/CodeGen/AMDGPU/function-args-inreg.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll (+72-72) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll (+616-618) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll 
(+36-36) 
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll (+21-21) 
- (modified) llvm/test/CodeGen/AMDGPU/global-alias.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll 
(+46-46) 
- (modified) llvm/test/CodeGen/AMDGPU/indirect-call.ll (+552-552) 
- (modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll 
(+818-816) 
- (modified) llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/nested-calls.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+5-20) 
- (modified) llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir 
(+95-95) 
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll (+6-21) 
- (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll (+160-139) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir 
(+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/sibling-call.ll (+111-111) 
- (modified) llvm/test/CodeGen/AMDGPU/stack-realign.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll (+7-7) 
- (modified) llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll (+2-2) 
- (modified) 
llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll (+20-18) 
- (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll 
(+84-84) 
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll (+1-1

[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add definition information to class templates (PR #169109)

2025-11-21 Thread Erick Velez via llvm-branch-commits

evelez7 wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.com/github/pr/llvm/llvm-project/169109?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#169109** https://app.graphite.com/github/pr/llvm/llvm-project/169109?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/169109?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#169107** https://app.graphite.com/github/pr/llvm/llvm-project/169107?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/169109
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add definition information to class templates (PR #169109)

2025-11-21 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 created 
https://github.com/llvm/llvm-project/pull/169109

None

>From dc51cf9e809bf3b90eb633152a0af6066bada997 Mon Sep 17 00:00:00 2001
From: Erick Velez 
Date: Fri, 21 Nov 2025 14:12:42 -0800
Subject: [PATCH] [clang-doc] Add definition information to class templates

---
 .../clang-doc/assets/class-template.mustache  | 1 +
 clang-tools-extra/test/clang-doc/namespace.cpp| 8 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache 
b/clang-tools-extra/clang-doc/assets/class-template.mustache
index 8eb28f33e65f6..8e221227bd9d9 100644
--- a/clang-tools-extra/clang-doc/assets/class-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/class-template.mustache
@@ -128,6 +128,7 @@
 
 
 {{TagType}} {{Name}}
+Defined at line {{Location.LineNumber}} of file 
{{Location.Filename}}
 {{#Description}}
 
 {{>Comments}}
diff --git a/clang-tools-extra/test/clang-doc/namespace.cpp 
b/clang-tools-extra/test/clang-doc/namespace.cpp
index 96ea5bc52b0be..e68cea3c1d539 100644
--- a/clang-tools-extra/test/clang-doc/namespace.cpp
+++ b/clang-tools-extra/test/clang-doc/namespace.cpp
@@ -70,7 +70,7 @@ void anonFunction() {}
 class AnonClass {};
 // MD-ANON-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANON-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-ANON-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-ANON-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-ANON-CLASS: # class AnonClass
 // HTML-ANON-CLASS: class AnonClass
@@ -117,7 +117,7 @@ void functionInPrimaryNamespace() {}
 class ClassInPrimaryNamespace {};
 // MD-PRIMARY-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-PRIMARY-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-PRIMARY-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-PRIMARY-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-PRIMARY-CLASS: # class ClassInPrimaryNamespace
 // MD-PRIMARY-CLASS: Class in PrimaryNamespace
@@ -139,7 +139,7 @@ void functionInNestedNamespace() {}
 class ClassInNestedNamespace {};
 // MD-NESTED-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-NESTED-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-NESTED-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-NESTED-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-NESTED-CLASS: # class ClassInNestedNamespace
 // MD-NESTED-CLASS: Class in NestedNamespace
@@ -233,7 +233,7 @@ void functionInAnotherNamespace() {}
 class ClassInAnotherNamespace {};
 // MD-ANOTHER-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANOTHER-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-ANOTHER-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-ANOTHER-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-ANOTHER-CLASS: # class ClassInAnotherNamespace
 // MD-ANOTHER-CLASS:  Class in AnotherNamespace

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add definition information to class templates (PR #169109)

2025-11-21 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 ready_for_review 
https://github.com/llvm/llvm-project/pull/169109
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add definition information to class templates (PR #169109)

2025-11-21 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-tools-extra

Author: Erick Velez (evelez7)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/169109.diff


2 Files Affected:

- (modified) clang-tools-extra/clang-doc/assets/class-template.mustache (+1) 
- (modified) clang-tools-extra/test/clang-doc/namespace.cpp (+4-4) 


``diff
diff --git a/clang-tools-extra/clang-doc/assets/class-template.mustache 
b/clang-tools-extra/clang-doc/assets/class-template.mustache
index 8eb28f33e65f6..8e221227bd9d9 100644
--- a/clang-tools-extra/clang-doc/assets/class-template.mustache
+++ b/clang-tools-extra/clang-doc/assets/class-template.mustache
@@ -128,6 +128,7 @@
 
 
 {{TagType}} {{Name}}
+Defined at line {{Location.LineNumber}} of file 
{{Location.Filename}}
 {{#Description}}
 
 {{>Comments}}
diff --git a/clang-tools-extra/test/clang-doc/namespace.cpp 
b/clang-tools-extra/test/clang-doc/namespace.cpp
index 96ea5bc52b0be..e68cea3c1d539 100644
--- a/clang-tools-extra/test/clang-doc/namespace.cpp
+++ b/clang-tools-extra/test/clang-doc/namespace.cpp
@@ -70,7 +70,7 @@ void anonFunction() {}
 class AnonClass {};
 // MD-ANON-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANON-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-ANON-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-ANON-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-ANON-CLASS: # class AnonClass
 // HTML-ANON-CLASS: class AnonClass
@@ -117,7 +117,7 @@ void functionInPrimaryNamespace() {}
 class ClassInPrimaryNamespace {};
 // MD-PRIMARY-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-PRIMARY-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-PRIMARY-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-PRIMARY-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-PRIMARY-CLASS: # class ClassInPrimaryNamespace
 // MD-PRIMARY-CLASS: Class in PrimaryNamespace
@@ -139,7 +139,7 @@ void functionInNestedNamespace() {}
 class ClassInNestedNamespace {};
 // MD-NESTED-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-NESTED-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-NESTED-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-NESTED-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-NESTED-CLASS: # class ClassInNestedNamespace
 // MD-NESTED-CLASS: Class in NestedNamespace
@@ -233,7 +233,7 @@ void functionInAnotherNamespace() {}
 class ClassInAnotherNamespace {};
 // MD-ANOTHER-CLASS-LINE: *Defined at 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp#[[@LINE-1]]*
 // HTML-ANOTHER-CLASS-LINE: Defined at line [[@LINE-2]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
-// MUSTACHE-ANOTHER-CLASS-LINE-NOT: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
+// MUSTACHE-ANOTHER-CLASS-LINE: Defined at line [[@LINE-3]] of file 
{{.*}}clang-tools-extra{{[\/]}}test{{[\/]}}clang-doc{{[\/]}}namespace.cpp
 
 // MD-ANOTHER-CLASS: # class ClassInAnotherNamespace
 // MD-ANOTHER-CLASS:  Class in AnotherNamespace

``




https://github.com/llvm/llvm-project/pull/169109
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add definition information to class templates (PR #169109)

2025-11-21 Thread Erick Velez via llvm-branch-commits

evelez7 wrote:

Definition line can be seen in the basic project: 
https://erickvelez.com/clang-doc-mustache-output/pr169109/GlobalNamespace/_ZTV5Shape.html

https://github.com/llvm/llvm-project/pull/169109
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] Add definition information to class templates (PR #169109)

2025-11-21 Thread via llvm-branch-commits

github-actions[bot] wrote:


# :penguin: Linux x64 Test Results

* 3053 tests passed
* 7 tests skipped

https://github.com/llvm/llvm-project/pull/169109
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] 8d821c3 - Revert "[MLIR][GPU] subgroup_mma fp64 extension (#165873)"

2025-11-21 Thread via llvm-branch-commits

Author: Fabian Mora
Date: 2025-11-21T10:00:01-05:00
New Revision: 8d821c3cdda1758a7391cd1b3afd8dfceed82095

URL: 
https://github.com/llvm/llvm-project/commit/8d821c3cdda1758a7391cd1b3afd8dfceed82095
DIFF: 
https://github.com/llvm/llvm-project/commit/8d821c3cdda1758a7391cd1b3afd8dfceed82095.diff

LOG: Revert "[MLIR][GPU] subgroup_mma fp64 extension (#165873)"

This reverts commit 49995b2af0abbec1095031dfe9eb049945b6d4f7.

Added: 


Modified: 
mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
mlir/test/Dialect/GPU/invalid.mlir

Removed: 
mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f64.mlir



diff  --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h 
b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
index 48982ac6efe7c..4c8abea680b66 100644
--- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
+++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
@@ -27,7 +27,7 @@ class MMAMatrixType;
 #define GEN_PASS_DECL_CONVERTGPUOPSTONVVMOPS
 #include "mlir/Conversion/Passes.h.inc"
 
-Type convertMMAToLLVMType(gpu::MMAMatrixType type);
+LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type);
 
 /// Configure target to convert from the GPU dialect to NVVM.
 void configureGpuToNVVMConversionLegality(ConversionTarget &target);

diff  --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 2c29bb8a01a41..860f893367203 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -114,7 +114,7 @@ def GPU_MMAMatrix : DialectType<
   GPU_Dialect, IsMMAMatrixTypePred, "MMAMatrix type">;
 
 // Memref type acceptable to gpu.subgroup_mma_{load|store}_matrix ops.
-def GPU_MMAMemRef : MemRefOf<[I8, I32, F16, F32, F64, VectorOfRankAndType<[1], 
[I8, I32, F16, F32, F64]>]>;
+def GPU_MMAMemRef : MemRefOf<[I8, I32, F16, F32, VectorOfRankAndType<[1], [I8, 
I32, F16, F32]>]>;
 
 class MMAMatrixOf allowedTypes> :
   ContainerType, IsMMAMatrixTypePred,

diff  --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 5c7df25c58cde..a6c6038e1e224 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1872,7 +1872,7 @@ def GPU_SubgroupMmaStoreMatrixOp : 
GPU_Op<"subgroup_mma_store_matrix",
 ```
   }];
 
-  let arguments = (ins Arg>:$src,
+  let arguments = (ins Arg>:$src,
   Arg]>:$dstMemref,
   Variadic:$indices,
   IndexAttr:$leadDimension,
@@ -1919,9 +1919,9 @@ def GPU_SubgroupMmaComputeOp
 ```
   }];
 
-  let arguments = (ins Arg>:$opA,
-  Arg>:$opB,
-  Arg>:$opC,
+  let arguments = (ins Arg>:$opA,
+  Arg>:$opB,
+  Arg>:$opC,
   OptionalAttr:$a_transpose,
   OptionalAttr:$b_transpose);
 

diff  --git a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp 
b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
index 6254de81780f5..99c059cb03299 100644
--- a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
@@ -17,7 +17,6 @@
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
 
 using namespace mlir;
 
@@ -58,8 +57,7 @@ static NVVM::MMATypes getElementType(gpu::MMAMatrixType type) 
{
   if (type.getElementType().isF32())
 return type.getOperand() == "COp" ? NVVM::MMATypes::f32
   : NVVM::MMATypes::tf32;
-  if (type.getElementType().isF64())
-return NVVM::MMATypes::f64;
+
   if (type.getElementType().isSignedInteger(8))
 return NVVM::MMATypes::s8;
   if (type.getElementType().isUnsignedInteger(8))
@@ -214,13 +212,8 @@ struct WmmaMmaOpToNVVMLowering
 // then passed on to the intrinsic call. Emit llvm ops to extract 
individual
 // values form lowered memrefs.
 SmallVector unpackedOps;
+
 auto unpackOp = [&](Value operand) {
-  // f64 a and b fragments are not structs but scalars.
-  if (!isa(operand.getType())) {
-unpackedOps.push_back(operand);
-return;
-  }
-  // every other type is lowered to an LLVM struct, extract the values.
   auto structType = cast(operand.getType());
   for (size_t i = 0, e = structType.getBody().size(); i < e; ++i) {
 Value toUse = LLVM::ExtractValueOp::create(rewriter, loc, operand, i);
@@ -283,16 +276,10 @@ struct WmmaConstantOpToNVVMLowering
   return failure();
 Location loc = subgroupMmaConstantOp.getLoc();
 Va

[llvm-branch-commits] [llvm] [LowerMemIntrinsics] Optimize memset lowering (PR #169040)

2025-11-21 Thread Fabian Ritter via llvm-branch-commits

ritter-x2a wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.com/github/pr/llvm/llvm-project/169040?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#169040** https://app.graphite.com/github/pr/llvm/llvm-project/169040?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/169040?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#169039** https://app.graphite.com/github/pr/llvm/llvm-project/169039?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/169040
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LowerMemIntrinsics] Optimize memset lowering (PR #169040)

2025-11-21 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- 
llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h 
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp 
llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp 
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp 
llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp 
llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp --diff_from_common_commit
``

:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:





View the diff from clang-format here.


``diff
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp 
b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index c0d7dbf00..07bfceb99 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -933,11 +933,11 @@ static void createMemMoveLoopKnownSize(Instruction 
*InsertBefore,
 /// Create a Value of \p DstType that consists of a sequence of copies of
 /// \p SetValue, using bitcasts and a vector splat.
 static Value *createMemSetSplat(const DataLayout &DL, IRBuilderBase &B,
-Value *SetValue, Type *DstType) {
+Value *SetValue, Type *DstType) {
   unsigned DstSize = DL.getTypeStoreSize(DstType);
   Type *SetValueType = SetValue->getType();
   unsigned SetValueSize = DL.getTypeStoreSize(SetValueType);
-  assert(SetValueSize == DL.getTypeAllocSize(SetValueType) && 
+  assert(SetValueSize == DL.getTypeAllocSize(SetValueType) &&
  "Store size and alloc size of SetValue's type must match");
   assert(SetValueSize != 0 && DstSize % SetValueSize == 0 &&
  "DstType size must be a multiple of SetValue size");

``




https://github.com/llvm/llvm-project/pull/169040
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NPM] Schedule PhysicalRegisterUsageAnalysis before RegUsageInfoCollectorPass (PR #168832)

2025-11-21 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/168832
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Make SIShrinkInstructions pass return valid changed state (PR #168833)

2025-11-21 Thread Vikram Hegde via llvm-branch-commits


@@ -580,6 +591,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr 
&MI) const {
 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
   MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
   MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+  MoveIterator = true;

vikramRH wrote:

IIUC this was introduced to skip looking for further shrinking and move on with 
next instruction.

https://github.com/llvm/llvm-project/pull/168833
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Delinarization] Add test for inferred array size exceeds integer range (PR #169048)

2025-11-21 Thread via llvm-branch-commits

github-actions[bot] wrote:


# :penguin: Linux x64 Test Results

* 186451 tests passed
* 4869 tests skipped

https://github.com/llvm/llvm-project/pull/169048
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lldb] 43d637e - Merge branch 'main' into revert-167879-fix-openmp-firstprivate-pointer

2025-11-21 Thread via llvm-branch-commits

Author: dpalermo
Date: 2025-11-21T21:03:14-06:00
New Revision: 43d637e0c44228d7769308c32b67d8ed5a988c1f

URL: 
https://github.com/llvm/llvm-project/commit/43d637e0c44228d7769308c32b67d8ed5a988c1f
DIFF: 
https://github.com/llvm/llvm-project/commit/43d637e0c44228d7769308c32b67d8ed5a988c1f.diff

LOG: Merge branch 'main' into revert-167879-fix-openmp-firstprivate-pointer

Added: 


Modified: 
lldb/packages/Python/lldbsuite/test/decorators.py
lldb/test/Shell/helper/toolchain.py
llvm/utils/lit/lit/llvm/config.py

Removed: 




diff  --git a/lldb/packages/Python/lldbsuite/test/decorators.py 
b/lldb/packages/Python/lldbsuite/test/decorators.py
index 23d2165e07f7e..7311b17f97e01 100644
--- a/lldb/packages/Python/lldbsuite/test/decorators.py
+++ b/lldb/packages/Python/lldbsuite/test/decorators.py
@@ -1059,6 +1059,16 @@ def is_compiler_with_address_sanitizer():
 return skipTestIfFn(is_compiler_with_address_sanitizer)(func)
 
 
+def skipUnlessBoundsSafety(func):
+"""Decorate the item to skip test unless Clang -fbounds-safety is 
supported."""
+
+def is_compiler_with_bounds_safety():
+if not _compiler_supports(lldbplatformutil.getCompiler(), 
"-fbounds-safety"):
+return "Compiler cannot compile with -fbounds-safety"
+return None
+
+return skipTestIfFn(is_compiler_with_bounds_safety)(func)
+
 def skipIfAsan(func):
 """Skip this test if the environment is set up to run LLDB *itself* under 
ASAN."""
 return skipTestIfFn(is_running_under_asan)(func)

diff  --git a/lldb/test/Shell/helper/toolchain.py 
b/lldb/test/Shell/helper/toolchain.py
index faa29d23387cc..b9e7dd7c196ab 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -277,6 +277,9 @@ def use_support_substitutions(config):
 required=True,
 use_installed=True,
 )
+if llvm_config.clang_has_bounds_safety():
+llvm_config.lit_config.note("clang has -fbounds-safety support")
+config.available_features.add("clang-bounds-safety")
 
 if sys.platform == "win32":
 _use_msvc_substitutions(config)

diff  --git a/llvm/utils/lit/lit/llvm/config.py 
b/llvm/utils/lit/lit/llvm/config.py
index 59982c94b787c..f212928caee1b 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -293,6 +293,17 @@ def get_process_output(self, command):
 except OSError:
 self.lit_config.fatal("Could not run process %s" % command)
 
+def check_process_success(self, command):
+cp = subprocess.run(
+command,
+stdout=subprocess.DEVNULL,
+stderr=subprocess.DEVNULL,
+env=self.config.environment,
+)
+if cp.returncode == 0:
+return True
+return False
+
 def feature_config(self, features):
 # Ask llvm-config about the specified feature.
 arguments = [x for (x, _) in features]
@@ -334,6 +345,25 @@ def get_clang_builtin_include_dir(self, clang):
 # Ensure the result is an ascii string, across Python2.5+ - Python3.
 return clang_dir
 
+def clang_has_bounds_safety(self, additional_flags=None):
+"""
+Return True iff `self.config.clang` supports -fbounds-safety
+"""
+if not self.config.clang:
+return False
+if not os.path.exists(self.config.clang):
+return False
+if additional_flags is None:
+additional_flags = []
+# Invoke the clang driver to see if it supports the `-fbounds-safety`
+# flag. Only the downstream implementation has this flag so this is
+# a simple way to check if the full implementation is available or not.
+cmd = [self.config.clang] + additional_flags
+cmd += ["-fbounds-safety", "-###"]
+if self.check_process_success(cmd):
+return True
+return False
+
 # On macOS, LSan is only supported on clang versions 5 and higher
 def get_clang_has_lsan(self, clang, triple):
 if not clang:



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lldb] 8bdbc57 - [NFC][LLDB] Make it possible to detect if the compiler used in tests supports -fbounds-safety (#169112)

2025-11-21 Thread via llvm-branch-commits

Author: Dan Liew
Date: 2025-11-21T19:01:31-08:00
New Revision: 8bdbc57b8975d77da88562392299ee5d9c2b6cbb

URL: 
https://github.com/llvm/llvm-project/commit/8bdbc57b8975d77da88562392299ee5d9c2b6cbb
DIFF: 
https://github.com/llvm/llvm-project/commit/8bdbc57b8975d77da88562392299ee5d9c2b6cbb.diff

LOG: [NFC][LLDB] Make it possible to detect if the compiler used in tests 
supports -fbounds-safety (#169112)

This patch makes it possible to detect in LLDB shell and API tests if
`-fbounds-safety` is supported by the compiler used for testing. The
motivation behind this is to allow upstreaming
https://github.com/swiftlang/llvm-project/pull/11835 but with the tests
disabled in upstream because the full implementation of -fbounds-safety
isn't available in Clang yet.

For shell tests when -fbounds-safety is available the
`clang-bounds-safety` feature is available which means tests can be
annotated with `# REQUIRES: clang-bounds-safety`.

API tests that need -fbounds-safety support in the compiler can use the
new `@skipUnlessBoundsSafety` decorator.

rdar://165225507

Added: 


Modified: 
lldb/packages/Python/lldbsuite/test/decorators.py
lldb/test/Shell/helper/toolchain.py
llvm/utils/lit/lit/llvm/config.py

Removed: 




diff  --git a/lldb/packages/Python/lldbsuite/test/decorators.py 
b/lldb/packages/Python/lldbsuite/test/decorators.py
index 23d2165e07f7e..7311b17f97e01 100644
--- a/lldb/packages/Python/lldbsuite/test/decorators.py
+++ b/lldb/packages/Python/lldbsuite/test/decorators.py
@@ -1059,6 +1059,16 @@ def is_compiler_with_address_sanitizer():
 return skipTestIfFn(is_compiler_with_address_sanitizer)(func)
 
 
+def skipUnlessBoundsSafety(func):
+"""Decorate the item to skip test unless Clang -fbounds-safety is 
supported."""
+
+def is_compiler_with_bounds_safety():
+if not _compiler_supports(lldbplatformutil.getCompiler(), 
"-fbounds-safety"):
+return "Compiler cannot compile with -fbounds-safety"
+return None
+
+return skipTestIfFn(is_compiler_with_bounds_safety)(func)
+
 def skipIfAsan(func):
 """Skip this test if the environment is set up to run LLDB *itself* under 
ASAN."""
 return skipTestIfFn(is_running_under_asan)(func)

diff  --git a/lldb/test/Shell/helper/toolchain.py 
b/lldb/test/Shell/helper/toolchain.py
index faa29d23387cc..b9e7dd7c196ab 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -277,6 +277,9 @@ def use_support_substitutions(config):
 required=True,
 use_installed=True,
 )
+if llvm_config.clang_has_bounds_safety():
+llvm_config.lit_config.note("clang has -fbounds-safety support")
+config.available_features.add("clang-bounds-safety")
 
 if sys.platform == "win32":
 _use_msvc_substitutions(config)

diff  --git a/llvm/utils/lit/lit/llvm/config.py 
b/llvm/utils/lit/lit/llvm/config.py
index 59982c94b787c..f212928caee1b 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -293,6 +293,17 @@ def get_process_output(self, command):
 except OSError:
 self.lit_config.fatal("Could not run process %s" % command)
 
+def check_process_success(self, command):
+cp = subprocess.run(
+command,
+stdout=subprocess.DEVNULL,
+stderr=subprocess.DEVNULL,
+env=self.config.environment,
+)
+if cp.returncode == 0:
+return True
+return False
+
 def feature_config(self, features):
 # Ask llvm-config about the specified feature.
 arguments = [x for (x, _) in features]
@@ -334,6 +345,25 @@ def get_clang_builtin_include_dir(self, clang):
 # Ensure the result is an ascii string, across Python2.5+ - Python3.
 return clang_dir
 
+def clang_has_bounds_safety(self, additional_flags=None):
+"""
+Return True iff `self.config.clang` supports -fbounds-safety
+"""
+if not self.config.clang:
+return False
+if not os.path.exists(self.config.clang):
+return False
+if additional_flags is None:
+additional_flags = []
+# Invoke the clang driver to see if it supports the `-fbounds-safety`
+# flag. Only the downstream implementation has this flag so this is
+# a simple way to check if the full implementation is available or not.
+cmd = [self.config.clang] + additional_flags
+cmd += ["-fbounds-safety", "-###"]
+if self.check_process_success(cmd):
+return True
+return False
+
 # On macOS, LSan is only supported on clang versions 5 and higher
 def get_clang_has_lsan(self, clang, triple):
 if not clang:



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/c

[llvm-branch-commits] [clang] cefbc48 - Revert "[OpenMP] Fix firstprivate pointer handling in target regions (#167879)"

2025-11-21 Thread via llvm-branch-commits

Author: dpalermo
Date: 2025-11-21T20:57:27-06:00
New Revision: cefbc487d482c607b3292d4356f2eeac8783c719

URL: 
https://github.com/llvm/llvm-project/commit/cefbc487d482c607b3292d4356f2eeac8783c719
DIFF: 
https://github.com/llvm/llvm-project/commit/cefbc487d482c607b3292d4356f2eeac8783c719.diff

LOG: Revert "[OpenMP] Fix firstprivate pointer handling in target regions 
(#167879)"

This reverts commit 622f72f4bef8b177e1e4f318465260fbdb7711ef.

Added: 


Modified: 
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/test/OpenMP/target_codegen.cpp
clang/test/OpenMP/target_defaultmap_codegen_01.cpp
clang/test/OpenMP/target_depend_codegen.cpp
clang/test/OpenMP/target_map_codegen_01.cpp
clang/test/OpenMP/target_map_codegen_09.cpp
clang/test/OpenMP/target_map_codegen_10.cpp
clang/test/OpenMP/target_map_codegen_26.cpp
clang/test/OpenMP/target_parallel_depend_codegen.cpp
clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
clang/test/OpenMP/target_simd_depend_codegen.cpp
clang/test/OpenMP/target_teams_depend_codegen.cpp
clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp

clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp

Removed: 
clang/test/OpenMP/target_firstprivate_pointer_codegen.cpp



diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 65a7daca9fcf1..a8255ac74cfcf 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -28,7 +28,6 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/CodeGen/ConstantInitBuilder.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
@@ -7212,9 +7211,6 @@ class MappableExprsHandler {
   /// firstprivate, false otherwise.
   llvm::DenseMap, bool> FirstPrivateDecls;
 
-  /// Set of defaultmap clause kinds that use firstprivate behavior.
-  llvm::SmallSet DefaultmapFirstprivateKinds;
-
   /// Map between device pointer declarations and their expression components.
   /// The key value for declarations in 'this' is null.
   llvm::DenseMap<
@@ -8993,10 +8989,6 @@ class MappableExprsHandler {
   FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
   }
 }
-// Extract defaultmap clause information.
-for (const auto *C : Dir.getClausesOfKind())
-  if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
-DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
 // Extract device pointer clause information.
 for (const auto *C : Dir.getClausesOfKind())
   for (auto L : C->component_lists())
@@ -9574,35 +9566,6 @@ class MappableExprsHandler {
 }
   }
 
-  /// Check if a variable should be treated as firstprivate due to explicit
-  /// firstprivate clause or defaultmap(firstprivate:...).
-  bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
-// Check explicit firstprivate clauses
-if (FirstPrivateDecls.count(VD))
-  return true;
-
-// Check defaultmap(firstprivate:scalar) for scalar types
-if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
-  if (Type->isScalarType())
-return true;
-}
-
-// Check defaultmap(firstprivate:pointer) for pointer types
-if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
-  if (Type->isAnyPointerType())
-return true;
-}
-
-// Check defaultmap(firstprivate:aggregate) for aggregate types
-if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
-  if (Type->isAggregateType())
-return true;
-}
-
-// Check defaultmap(firstprivate:all) for all types
-return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
-  }
-
   /// Generate the default map information for a given capture \a CI,
   /// record field declaration \a RI and captured value \a CV.
   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
@@ -9630,9 +9593,6 @@ class MappableExprsHandler {
   CombinedInfo.DevicePtrDecls.push_back(nullptr);
   CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
   CombinedInfo.Pointers.push_back(CV);
-  bool IsFirstprivate =
-  isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
-
   if (!RI.getType()->isAnyPointerType()) {
 // We have to signal to the runtime captures passed by value that are
 // not pointers.
@@ -9640,13 +9600,6 @@ class MappableExprsHandler {
 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
 CombinedInfo.Sizes.push_back(CGF.Builder.CreateI